示例#1
0
def _start_server(cluster):
  args = [beeswax.conf.HIVE_SERVER_BIN.get()]

  env = cluster._mr2_env.copy()

  env.update({
    'HIVE_CONF_DIR': beeswax.conf.HIVE_CONF_DIR.get(),
    'HIVE_SERVER2_THRIFT_PORT': str(HIVE_SERVER_TEST_PORT),
    'HADOOP_MAPRED_HOME': get_run_root('ext/hadoop/hadoop') + '/share/hadoop/mapreduce',
    # Links created in jenkins script.
    # If missing classes when booting HS2, check here.
    'AUX_CLASSPATH':
       get_run_root('ext/hadoop/hadoop') + '/share/hadoop/hdfs/hadoop-hdfs.jar'
       + ':' +
       get_run_root('ext/hadoop/hadoop') + '/share/hadoop/common/lib/hadoop-auth.jar'
       + ':' +
       get_run_root('ext/hadoop/hadoop') + '/share/hadoop/common/hadoop-common.jar'
       + ':' +
       get_run_root('ext/hadoop/hadoop') + '/share/hadoop/mapreduce/hadoop-mapreduce-client-core.jar'
       ,
    'HADOOP_CLASSPATH': '',
  })

  if os.getenv("JAVA_HOME"):
    env["JAVA_HOME"] = os.getenv("JAVA_HOME")

  LOG.info("Executing %s, env %s, cwd %s" % (repr(args), repr(env), cluster._tmpdir))
  return subprocess.Popen(args=args, env=env, cwd=cluster._tmpdir, stdin=subprocess.PIPE)
示例#2
0
文件: test_base.py 项目: rnirmal/hue
def _start_mini_hs2(cluster):
    HIVE_CONF = cluster.hadoop_conf_dir
    finish = (
        beeswax.conf.HIVE_SERVER_HOST.set_for_testing(get_localhost_name()),
        beeswax.conf.HIVE_SERVER_PORT.set_for_testing(HIVE_SERVER_TEST_PORT),
        beeswax.conf.HIVE_SERVER_BIN.set_for_testing(get_run_root("ext/hive/hive") + "/bin/hiveserver2"),
        beeswax.conf.HIVE_CONF_DIR.set_for_testing(HIVE_CONF),
    )

    default_xml = """<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>

<property>
 <name>javax.jdo.option.ConnectionURL</name>
 <value>jdbc:derby:;databaseName=%(root)s/metastore_db;create=true</value>
 <description>JDBC connect string for a JDBC metastore</description>
</property>

<property>
  <name>hive.server2.enable.impersonation</name>
  <value>false</value>
</property>

<property>
 <name>hive.querylog.location</name>
 <value>%(querylog)s</value>
</property>

</configuration>
""" % {
        "root": cluster._tmpdir,
        "querylog": cluster.log_dir + "/hive",
    }

    file(HIVE_CONF + "/hive-site.xml", "w").write(default_xml)

    global _SHARED_HIVE_SERVER_PROCESS

    if _SHARED_HIVE_SERVER_PROCESS is None:
        p = _start_server(cluster)
        LOG.info("started")
        cluster.fs.do_as_superuser(cluster.fs.chmod, "/tmp", 01777)

        _SHARED_HIVE_SERVER_PROCESS = p

        def kill():
            LOG.info("Killing server (pid %d)." % p.pid)
            os.kill(p.pid, 9)
            p.wait()

        atexit.register(kill)

    def s():
        for f in finish:
            f()
        cluster.stop()

    return s
示例#3
0
def _start_mini_hs2(cluster):
    HIVE_CONF = cluster.hadoop_conf_dir
    finish = (
        beeswax.conf.HIVE_SERVER_HOST.set_for_testing(get_localhost_name()),
        beeswax.conf.HIVE_SERVER_PORT.set_for_testing(HIVE_SERVER_TEST_PORT),
        beeswax.conf.HIVE_SERVER_BIN.set_for_testing(
            get_run_root('ext/hive/hive') + '/bin/hiveserver2'),
        beeswax.conf.HIVE_CONF_DIR.set_for_testing(HIVE_CONF))

    default_xml = """<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>

<property>
 <name>javax.jdo.option.ConnectionURL</name>
 <value>jdbc:derby:;databaseName=%(root)s/metastore_db;create=true</value>
 <description>JDBC connect string for a JDBC metastore</description>
</property>

<property>
  <name>hive.server2.enable.impersonation</name>
  <value>false</value>
</property>

<property>
 <name>hive.querylog.location</name>
 <value>%(querylog)s</value>
</property>

</configuration>
""" % {
        'root': cluster._tmpdir,
        'querylog': cluster.log_dir + '/hive'
    }

    file(HIVE_CONF + '/hive-site.xml', 'w').write(default_xml)

    global _SHARED_HIVE_SERVER_PROCESS

    if _SHARED_HIVE_SERVER_PROCESS is None:
        p = _start_server(cluster)
        LOG.info("started")
        cluster.fs.do_as_superuser(cluster.fs.chmod, '/tmp', 0o1777)

        _SHARED_HIVE_SERVER_PROCESS = p

        def kill():
            LOG.info("Killing server (pid %d)." % p.pid)
            os.kill(p.pid, 9)
            p.wait()

        atexit.register(kill)

    def s():
        for f in finish:
            f()
        cluster.stop()

    return s
示例#4
0
def _start_server(cluster):
  args = [beeswax.conf.HIVE_SERVER_BIN.get()]

  env = cluster._mr2_env.copy()

  hadoop_cp_proc = subprocess.Popen(args=[get_run_root('ext/hadoop/hadoop') + '/bin/hadoop', 'classpath'], env=env, cwd=cluster._tmpdir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  hadoop_cp_proc.wait()
  hadoop_cp = hadoop_cp_proc.stdout.read().strip()

  env.update({
    'HADOOP_HOME': get_run_root('ext/hadoop/hadoop'), # Used only by Hive for some reason
    'HIVE_CONF_DIR': beeswax.conf.HIVE_CONF_DIR.get(),
    'HIVE_SERVER2_THRIFT_PORT': str(HIVE_SERVER_TEST_PORT),
    'HADOOP_MAPRED_HOME': get_run_root('ext/hadoop/hadoop') + '/share/hadoop/mapreduce',
    # Links created in jenkins script.
    # If missing classes when booting HS2, check here.
    'AUX_CLASSPATH':
       get_run_root('ext/hadoop/hadoop') + '/share/hadoop/hdfs/hadoop-hdfs.jar'
       + ':' +
       get_run_root('ext/hadoop/hadoop') + '/share/hadoop/common/lib/hadoop-auth.jar'
       + ':' +
       get_run_root('ext/hadoop/hadoop') + '/share/hadoop/common/hadoop-common.jar'
       + ':' +
       get_run_root('ext/hadoop/hadoop') + '/share/hadoop/mapreduce/hadoop-mapreduce-client-core.jar'
       ,
      'HADOOP_CLASSPATH': hadoop_cp,
  })

  if os.getenv("JAVA_HOME"):
    env["JAVA_HOME"] = os.getenv("JAVA_HOME")

  LOG.info("Executing %s, env %s, cwd %s" % (repr(args), repr(env), cluster._tmpdir))
  return subprocess.Popen(args=args, env=env, cwd=cluster._tmpdir, stdin=subprocess.PIPE)
示例#5
0
文件: views.py 项目: maduhu/HDP-hue
def _get_components():
  components = []
  try:
    components += _read_versions(os.path.join(get_run_root(), "VERSIONS"))
    extra_versions_path = os.path.join(get_var_root(), "EXTRA_VERSIONS")
    if os.path.exists(extra_versions_path):
      components += _read_versions(extra_versions_path)
  except ValueError:#Exception:
    components = [
      ('HDP', "2.0.6"),
      ('Hadoop', "1.2.0.1.3.0.0-107"),
      ('HCatalog', "0.11.0.1.3.0.0-107"),
      ('Pig', "0.11.1.1.3.0.0-107"),
      ('Hive', "0.11.0.1.3.0.0-107"),
      ('Oozie', "3.3.2.1.3.0.0-107")
    ]

  if conf.TUTORIALS_INSTALLED.get():
    components.insert(0, ('Tutorials', _get_tutorials_version()))
    # components.insert(0, ("Sandbox", conf.SANDBOX_VERSION.get()))
  return components, HUE_VERSION
示例#6
0
文件: test_base.py 项目: rnirmal/hue
def _start_server(cluster):
    args = [beeswax.conf.HIVE_SERVER_BIN.get()]

    env = cluster._mr2_env.copy()

    hadoop_cp_proc = subprocess.Popen(
        args=[get_run_root("ext/hadoop/hadoop") + "/bin/hadoop", "classpath"],
        env=env,
        cwd=cluster._tmpdir,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    hadoop_cp_proc.wait()
    hadoop_cp = hadoop_cp_proc.stdout.read().strip()

    env.update(
        {
            "HADOOP_HOME": get_run_root("ext/hadoop/hadoop"),  # Used only by Hive for some reason
            "HIVE_CONF_DIR": beeswax.conf.HIVE_CONF_DIR.get(),
            "HIVE_SERVER2_THRIFT_PORT": str(HIVE_SERVER_TEST_PORT),
            "HADOOP_MAPRED_HOME": get_run_root("ext/hadoop/hadoop") + "/share/hadoop/mapreduce",
            # Links created in jenkins script.
            # If missing classes when booting HS2, check here.
            "AUX_CLASSPATH": get_run_root("ext/hadoop/hadoop")
            + "/share/hadoop/hdfs/hadoop-hdfs.jar"
            + ":"
            + get_run_root("ext/hadoop/hadoop")
            + "/share/hadoop/common/lib/hadoop-auth.jar"
            + ":"
            + get_run_root("ext/hadoop/hadoop")
            + "/share/hadoop/common/hadoop-common.jar"
            + ":"
            + get_run_root("ext/hadoop/hadoop")
            + "/share/hadoop/mapreduce/hadoop-mapreduce-client-core.jar",
            "HADOOP_CLASSPATH": hadoop_cp,
        }
    )

    if os.getenv("JAVA_HOME"):
        env["JAVA_HOME"] = os.getenv("JAVA_HOME")

    LOG.info("Executing %s, env %s, cwd %s" % (repr(args), repr(env), cluster._tmpdir))
    return subprocess.Popen(args=args, env=env, cwd=cluster._tmpdir, stdin=subprocess.PIPE)
示例#7
0
 def _get_hdfs_bin(self, env):
     try:
         return env['HDFS_BIN']
     except KeyError:
         return os.path.join(get_run_root('ext/hadoop/hadoop'), 'bin',
                             'hdfs')
示例#8
0
 def _get_yarn_bin(self, env):
     try:
         return env['YARN_BIN']
     except KeyError:
         return os.path.join(get_run_root('ext/hadoop/hadoop'), 'bin',
                             'yarn')
示例#9
0
 def _get_mapred_bin(self, env):
     try:
         return env['MAPRED_BIN']
     except KeyError:
         return os.path.join(get_run_root('ext/hadoop/hadoop'), 'bin',
                             'mapred')
示例#10
0
    def start(self):
        LOG.info("Using temporary directory: %s" % (self._tmpdir, ))

        if not os.path.exists(self.hadoop_conf_dir):
            os.mkdir(self.hadoop_conf_dir)

        self._log_dir = self._tmppath('logs')
        if not os.path.exists(self._log_dir):
            os.mkdir(self._log_dir)

        self._local_dir = self._tmppath('local')
        if not os.path.exists(self._local_dir):
            os.mkdir(self._local_dir)

        self._write_hadoop_metrics_conf(self.hadoop_conf_dir)
        self._write_core_site()
        self._write_hdfs_site()
        self._write_yarn_site()
        self._write_mapred_site()

        # More stuff to setup in the environment
        env = {
            'YARN_HOME': get_run_root('ext/hadoop/hadoop'),
            'HADOOP_COMMON_HOME': get_run_root('ext/hadoop/hadoop'),
            'HADOOP_MAPRED_HOME': get_run_root('ext/hadoop/hadoop'),
            'HADOOP_HDFS_HOME': get_run_root('ext/hadoop/hadoop'),
            'HADOOP_CONF_DIR': self.hadoop_conf_dir,
            'YARN_CONF_DIR': self.hadoop_conf_dir,
            'HADOOP_HEAPSIZE': '128',
            'HADOOP_LOG_DIR': self._log_dir,
            'USER': self.superuser,
            'LANG': "en_US.UTF-8",
            'PATH': os.environ['PATH'],
        }

        if "JAVA_HOME" in os.environ:
            env['JAVA_HOME'] = os.environ['JAVA_HOME']

        LOG.debug("Hadoop Environment:\n" +
                  "\n".join([str(x) for x in sorted(env.items())]))

        # Format HDFS
        self._format(self.hadoop_conf_dir, env)

        # Run them
        self._nn_proc = self._start_daemon('namenode', self.hadoop_conf_dir,
                                           env)
        self._dn_proc = self._start_daemon('datanode', self.hadoop_conf_dir,
                                           env)

        # Make sure they're running
        deadline = time.time() + STARTUP_DEADLINE
        while not self._is_hdfs_ready(env):
            if time.time() > deadline:
                self.stop()
                raise RuntimeError('%s is taking too long to start' % (self, ))
            time.sleep(5)

        # Start MR2
        self._start_mr2(env)

        # Create HDFS directories
        if not self.fs.exists('/tmp'):
            self.fs.do_as_superuser(self.mkdir, '/tmp', 01777)
        self.fs.do_as_superuser(self.fs.chmod, '/tmp', 01777)

        self.fs.do_as_superuser(self.fs.mkdir, '/tmp/hadoop-yarn', 01777)
        self.fs.do_as_superuser(self.fs.chmod, '/tmp/hadoop-yarn', 01777)

        self.fs.do_as_superuser(self.fs.mkdir, '/tmp/hadoop-yarn/staging',
                                01777)
        self.fs.do_as_superuser(self.fs.chmod, '/tmp/hadoop-yarn/staging',
                                01777)

        self.fs.do_as_superuser(self.fs.mkdir,
                                '/tmp/hadoop-yarn/staging/history', 01777)
        self.fs.do_as_superuser(self.fs.chmod,
                                '/tmp/hadoop-yarn/staging/history', 01777)

        self.fs.do_as_superuser(self.fs.mkdir, '/var/log/hadoop-yarn/apps',
                                01777)
        self.fs.do_as_superuser(self.fs.chmod, '/var/log/hadoop-yarn/apps',
                                01777)

        self.fs.create_home_dir('/user/test')
        self.fs.create_home_dir('/user/hue')
示例#11
0
文件: test_base.py 项目: bugcy013/hue
def get_shared_beeswax_server():
  global _SHARED_HIVE_SERVER
  global _SHARED_HIVE_SERVER_CLOSER
  if _SHARED_HIVE_SERVER is None:

    cluster = pseudo_hdfs4.shared_cluster()

    HIVE_CONF = cluster.hadoop_conf_dir
    finish = (
      beeswax.conf.HIVE_SERVER_HOST.set_for_testing(get_localhost_name()),
      beeswax.conf.HIVE_SERVER_PORT.set_for_testing(HIVE_SERVER_TEST_PORT),
      beeswax.conf.HIVE_SERVER_BIN.set_for_testing(get_run_root('ext/hive/hive') + '/bin/hiveserver2'),
      beeswax.conf.HIVE_CONF_DIR.set_for_testing(HIVE_CONF)
    )

    default_xml = """<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>

<property>
  <name>javax.jdo.option.ConnectionURL</name>
  <value>jdbc:derby:;databaseName=%(root)s/metastore_db;create=true</value>
  <description>JDBC connect string for a JDBC metastore</description>
</property>

 <property>
   <name>hive.server2.enable.impersonation</name>
   <value>false</value>
 </property>

<property>
  <name>hive.querylog.location</name>
  <value>%(querylog)s</value>
</property>

</configuration>
""" % {'root': cluster._tmpdir, 'querylog': cluster.log_dir + '/hive'}

    file(HIVE_CONF + '/hive-site.xml', 'w').write(default_xml)

    global _SHARED_HIVE_SERVER_PROCESS

    if _SHARED_HIVE_SERVER_PROCESS is None:
      p = _start_server(cluster)
      LOG.info("started")

      _SHARED_HIVE_SERVER_PROCESS = p
      def kill():
        LOG.info("Killing server (pid %d)." % p.pid)
        os.kill(p.pid, 9)
        p.wait()
      atexit.register(kill)

      start = time.time()
      started = False
      sleep = 0.001

      make_logged_in_client()
      user = User.objects.get(username='******')
      query_server = get_query_server_config()
      db = dbms.get(user, query_server)

      while not started and time.time() - start < 20.0:
        try:
          db.open_session(user)
          started = True
          break
        except Exception, e:
          LOG.info('HiveServer2 server status not started yet after: %s' % e)
          time.sleep(sleep)
          sleep *= 2

      if not started:
        raise Exception("Server took too long to come up.")

    def s():
      for f in finish:
        f()
      cluster.stop()

    _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s
示例#12
0
 def _get_mapred_bin(self, env):
     try:
         return env["MAPRED_BIN"]
     except KeyError:
         return os.path.join(get_run_root("ext/hadoop/hadoop"), "bin", "mapred")
示例#13
0
    def start(self):
        LOG.info("Using temporary directory: %s" % (self._tmpdir,))

        if not os.path.exists(self.hadoop_conf_dir):
            os.mkdir(self.hadoop_conf_dir)

        self._log_dir = self._tmppath("logs")
        if not os.path.exists(self._log_dir):
            os.mkdir(self._log_dir)

        self._local_dir = self._tmppath("local")
        if not os.path.exists(self._local_dir):
            os.mkdir(self._local_dir)

        self._write_hadoop_metrics_conf(self.hadoop_conf_dir)
        self._write_core_site()
        self._write_hdfs_site()
        self._write_yarn_site()
        self._write_mapred_site()

        # More stuff to setup in the environment
        env = {
            "YARN_HOME": get_run_root("ext/hadoop/hadoop"),
            "HADOOP_COMMON_HOME": get_run_root("ext/hadoop/hadoop"),
            "HADOOP_MAPRED_HOME": get_run_root("ext/hadoop/hadoop"),
            "HADOOP_HDFS_HOME": get_run_root("ext/hadoop/hadoop"),
            "HADOOP_CONF_DIR": self.hadoop_conf_dir,
            "YARN_CONF_DIR": self.hadoop_conf_dir,
            "HADOOP_HEAPSIZE": "128",
            "HADOOP_LOG_DIR": self._log_dir,
            "USER": self.superuser,
            "LANG": "en_US.UTF-8",
            "PATH": os.environ["PATH"],
        }

        if "JAVA_HOME" in os.environ:
            env["JAVA_HOME"] = os.environ["JAVA_HOME"]

        LOG.debug("Hadoop Environment:\n" + "\n".join([str(x) for x in sorted(env.items())]))

        # Format HDFS
        self._format(self.hadoop_conf_dir, env)

        # Run them
        self._nn_proc = self._start_daemon("namenode", self.hadoop_conf_dir, env)
        self._dn_proc = self._start_daemon("datanode", self.hadoop_conf_dir, env)

        # Make sure they're running
        deadline = time.time() + STARTUP_DEADLINE
        while not self._is_hdfs_ready(env):
            if time.time() > deadline:
                self.stop()
                raise RuntimeError("%s is taking too long to start" % (self,))
            time.sleep(5)

        # Start MR2
        self._start_mr2(env)

        # Create HDFS directories
        if not self.fs.exists("/tmp"):
            self.fs.do_as_superuser(self.mkdir, "/tmp", 01777)
        self.fs.do_as_superuser(self.fs.chmod, "/tmp", 01777)

        self.fs.do_as_superuser(self.fs.mkdir, "/tmp/hadoop-yarn", 01777)
        self.fs.do_as_superuser(self.fs.chmod, "/tmp/hadoop-yarn", 01777)

        self.fs.do_as_superuser(self.fs.mkdir, "/tmp/hadoop-yarn/staging", 01777)
        self.fs.do_as_superuser(self.fs.chmod, "/tmp/hadoop-yarn/staging", 01777)

        self.fs.do_as_superuser(self.fs.mkdir, "/tmp/hadoop-yarn/staging/history", 01777)
        self.fs.do_as_superuser(self.fs.chmod, "/tmp/hadoop-yarn/staging/history", 01777)

        self.fs.do_as_superuser(self.fs.mkdir, "/var/log/hadoop-yarn/apps", 01777)
        self.fs.do_as_superuser(self.fs.chmod, "/var/log/hadoop-yarn/apps", 01777)

        self.fs.create_home_dir("/user/test")
        self.fs.create_home_dir("/user/hue")
示例#14
0
 def _get_mapred_bin(self, env):
   try:
     return env['MAPRED_BIN']
   except KeyError:
     return os.path.join(get_run_root('ext/hadoop/hadoop'), 'bin', 'mapred')
示例#15
0
def get_shared_beeswax_server():
    global _SHARED_HIVE_SERVER
    global _SHARED_HIVE_SERVER_CLOSER
    if _SHARED_HIVE_SERVER is None:

        cluster = pseudo_hdfs4.shared_cluster()

        HIVE_CONF = cluster.hadoop_conf_dir
        finish = (beeswax.conf.HIVE_SERVER_HOST.set_for_testing(
            get_localhost_name()),
                  beeswax.conf.HIVE_SERVER_PORT.set_for_testing(
                      HIVE_SERVER_TEST_PORT),
                  beeswax.conf.HIVE_SERVER_BIN.set_for_testing(
                      get_run_root('ext/hive/hive') + '/bin/hiveserver2'),
                  beeswax.conf.HIVE_CONF_DIR.set_for_testing(HIVE_CONF))

        default_xml = """<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>

<property>
  <name>javax.jdo.option.ConnectionURL</name>
  <value>jdbc:derby:;databaseName=%(root)s/metastore_db;create=true</value>
  <description>JDBC connect string for a JDBC metastore</description>
</property>

 <property>
   <name>hive.server2.enable.impersonation</name>
   <value>false</value>
 </property>

<property>
  <name>hive.querylog.location</name>
  <value>%(querylog)s</value>
</property>

</configuration>
""" % {
            'root': cluster._tmpdir,
            'querylog': cluster.log_dir + '/hive'
        }

        file(HIVE_CONF + '/hive-site.xml', 'w').write(default_xml)

        global _SHARED_HIVE_SERVER_PROCESS

        if _SHARED_HIVE_SERVER_PROCESS is None:
            p = _start_server(cluster)
            LOG.info("started")
            cluster.fs.do_as_superuser(cluster.fs.chmod, '/tmp', 01777)

            _SHARED_HIVE_SERVER_PROCESS = p

            def kill():
                LOG.info("Killing server (pid %d)." % p.pid)
                os.kill(p.pid, 9)
                p.wait()

            atexit.register(kill)

            start = time.time()
            started = False
            sleep = 1

            make_logged_in_client()
            user = User.objects.get(username='******')
            query_server = get_query_server_config()
            db = dbms.get(user, query_server)

            while not started and time.time() - start <= 30:
                try:
                    db.open_session(user)
                    started = True
                    break
                except Exception, e:
                    LOG.info(
                        'HiveServer2 server status not started yet after: %s' %
                        e)
                    time.sleep(sleep)

            if not started:
                raise Exception("Server took too long to come up.")

        def s():
            for f in finish:
                f()
            cluster.stop()

        _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s
示例#16
0
  def start(self):
    LOG.info("Using temporary directory: %s" % (self._tmpdir,))

    if not os.path.exists(self.hadoop_conf_dir):
      os.mkdir(self.hadoop_conf_dir)

    self._log_dir = self._tmppath('logs')
    if not os.path.exists(self._log_dir):
      os.mkdir(self._log_dir)

    self._local_dir = self._tmppath('local')
    if not os.path.exists(self._local_dir):
      os.mkdir(self._local_dir)

    self._write_hadoop_metrics_conf(self.hadoop_conf_dir)
    self._write_core_site()
    self._write_hdfs_site()
    self._write_yarn_site()
    self._write_mapred_site()

    # More stuff to setup in the environment
    env = {
      'YARN_HOME': get_run_root('ext/hadoop/hadoop'),
      'HADOOP_COMMON_HOME': get_run_root('ext/hadoop/hadoop'),
      'HADOOP_MAPRED_HOME': get_run_root('ext/hadoop/hadoop'),
      'HADOOP_HDFS_HOME': get_run_root('ext/hadoop/hadoop'),

      'HADOOP_CONF_DIR': self.hadoop_conf_dir,
      'YARN_CONF_DIR': self.hadoop_conf_dir,

      'HADOOP_HEAPSIZE': '128',
      'HADOOP_LOG_DIR': self._log_dir,
      'USER': self.superuser,
      'LANG': "en_US.UTF-8",
      'PATH': os.environ['PATH'],
    }

    if "JAVA_HOME" in os.environ:
      env['JAVA_HOME'] = os.environ['JAVA_HOME']

    LOG.debug("Hadoop Environment:\n" + "\n".join([ str(x) for x in sorted(env.items()) ]))

    # Format HDFS
    self._format(self.hadoop_conf_dir, env)

    # Run them
    self._nn_proc = self._start_daemon('namenode', self.hadoop_conf_dir, env)
    self._dn_proc = self._start_daemon('datanode', self.hadoop_conf_dir, env)

    # Make sure they're running
    deadline = time.time() + STARTUP_DEADLINE
    while not self._is_hdfs_ready(env):
      if time.time() > deadline:
        self.stop()
        raise RuntimeError('%s is taking too long to start' % (self,))
      time.sleep(5)

    # Start MR2
    self._start_mr2(env)

    # Create HDFS directories
    if not self.fs.exists('/tmp'):
      self.fs.do_as_superuser(self.mkdir, '/tmp', 01777)
    self.fs.do_as_superuser(self.fs.chmod, '/tmp', 01777)

    self.fs.do_as_superuser(self.fs.mkdir, '/tmp/hadoop-yarn', 01777)
    self.fs.do_as_superuser(self.fs.chmod, '/tmp/hadoop-yarn', 01777)

    self.fs.do_as_superuser(self.fs.mkdir, '/tmp/hadoop-yarn/staging', 01777)
    self.fs.do_as_superuser(self.fs.chmod, '/tmp/hadoop-yarn/staging', 01777)

    self.fs.do_as_superuser(self.fs.mkdir, '/tmp/hadoop-yarn/staging/history', 01777)
    self.fs.do_as_superuser(self.fs.chmod, '/tmp/hadoop-yarn/staging/history', 01777)

    self.fs.do_as_superuser(self.fs.mkdir, '/var/log/hadoop-yarn/apps', 01777)
    self.fs.do_as_superuser(self.fs.chmod, '/var/log/hadoop-yarn/apps', 01777)

    self.fs.do_as_user('test', self.fs.create_home_dir, '/user/test')
    self.fs.do_as_user('hue', self.fs.create_home_dir, '/user/hue')
示例#17
0
 def _get_yarn_bin(self, env):
     try:
         return env["YARN_BIN"]
     except KeyError:
         return os.path.join(get_run_root("ext/hadoop/hadoop"), "bin", "yarn")
示例#18
0
class SqoopServerProvider(object):
    """
  Setup a Sqoop server.
  """
    TEST_PORT = '19080'
    TEST_SHUTDOWN_PORT = '19081'
    HOME = get_run_root('ext/sqoop/sqoop')

    requires_hadoop = True
    integration = True

    is_running = False

    @classmethod
    def setup_class(cls):

        if not is_live_cluster():
            raise SkipTest()

        cls.cluster = pseudo_hdfs4.shared_cluster()
        cls.client, callback = cls.get_shared_server()
        cls.shutdown = [callback]

    @classmethod
    def initialize(cls, tmpdir):
        hadoop_conf_dir = os.path.join(tmpdir, 'conf')
        base_dir = os.path.join(tmpdir, 'sqoop')
        log_dir = os.path.join(base_dir, 'logs')
        conf_dir = os.path.join(base_dir, 'conf')
        old_conf_dir = os.path.join(SqoopServerProvider.HOME, 'server/conf')

        if not os.path.exists(hadoop_conf_dir):
            os.mkdir(hadoop_conf_dir)
        if not os.path.exists(base_dir):
            os.mkdir(base_dir)
        if not os.path.exists(log_dir):
            os.mkdir(log_dir)
        if not os.path.exists(conf_dir):
            os.mkdir(conf_dir)

        for _file in ('sqoop.properties', 'sqoop_bootstrap.properties'):
            with open(os.path.join(old_conf_dir, _file), 'r') as _original:
                with open(os.path.join(conf_dir, _file), 'w') as _new:
                    for _line in _original:
                        line = _line.replace('${test.log.dir}', log_dir)
                        line = line.replace('${test.hadoop.conf.dir}',
                                            hadoop_conf_dir)
                        line = line.replace('${test.base.dir}', base_dir)
                        _new.write(line)
        # This sets JAVA_OPTS with a sqoop conf... we need to use our own.
        os.chmod(
            os.path.join(SqoopServerProvider.HOME, 'server/bin/setenv.sh'), 0)

    @classmethod
    def start(cls, cluster):
        """
    Start oozie process.
    """
        SqoopServerProvider.initialize(cluster._tmpdir)

        env = os.environ
        env['CATALINA_HOME'] = os.path.join(SqoopServerProvider.HOME, 'server')
        env['CATALINA_PID'] = os.path.join(cluster._tmpdir, 'sqoop/sqoop.pid')
        env['CATALINA_OPTS'] = """
      -Dtest.log.dir=%(log_dir)s
      -Dtest.host.local=%(host)s
      -Dsqoop.http.port=%(http_port)s
      -Dsqoop.admin.port=%(admin_port)s
    """ % {
            'log_dir': os.path.join(cluster._tmpdir, 'sqoop/logs'),
            'host': socket.getfqdn(),
            'http_port': SqoopServerProvider.TEST_PORT,
            'admin_port': SqoopServerProvider.TEST_SHUTDOWN_PORT
        }
        env['SQOOP_HTTP_PORT'] = SqoopServerProvider.TEST_PORT
        env['SQOOP_ADMIN_PORT'] = SqoopServerProvider.TEST_SHUTDOWN_PORT
        env['JAVA_OPTS'] = '-Dsqoop.config.dir=%s' % os.path.join(
            cluster._tmpdir, 'sqoop/conf')
        args = [
            os.path.join(SqoopServerProvider.HOME, 'bin/sqoop.sh'), 'server',
            'start'
        ]

        LOG.info("Executing %s, env %s, cwd %s" %
                 (repr(args), repr(env), cluster._tmpdir))
        process = subprocess.Popen(args=args,
                                   env=env,
                                   cwd=cluster._tmpdir,
                                   stdin=subprocess.PIPE)
        return process

    @classmethod
    def get_shared_server(cls,
                          username='******',
                          language=settings.LANGUAGE_CODE):
        callback = lambda: None

        with service_lock:
            if not SqoopServerProvider.is_running:
                # Setup
                cluster = pseudo_hdfs4.shared_cluster()

                if is_live_cluster():
                    finish = ()
                else:
                    LOG.info(
                        '\nStarting a Mini Sqoop. Requires "tools/jenkins/jenkins.sh" to be previously ran.\n'
                    )

                    finish = (SERVER_URL.set_for_testing(
                        "http://%s:%s/sqoop" %
                        (socket.getfqdn(), SqoopServerProvider.TEST_PORT)), )

                    p = cls.start(cluster)

                    def kill():
                        with open(
                                os.path.join(cluster._tmpdir,
                                             'sqoop/sqoop.pid'),
                                'r') as pidfile:
                            pid = pidfile.read()
                            LOG.info("Killing Sqoop server (pid %s)." % pid)
                            os.kill(int(pid), 9)
                            p.wait()

                    atexit.register(kill)

                start = time.time()
                started = False
                sleep = 0.01

                client = SqoopClient(SERVER_URL.get(), username, language)

                while not started and time.time() - start < 60.0:
                    LOG.info('Check Sqoop status...')
                    try:
                        version = client.get_version()
                    except RestException as e:
                        LOG.exception(
                            'Exception fetching the Sqoop server version')

                        # Don't loop if we had an authentication error.
                        if e.code == 401:
                            raise
                    except Exception as e:
                        LOG.info('Sqoop server not started yet: %s' % e)
                    else:
                        if version:
                            started = True
                            break

                    time.sleep(sleep)
                    sleep *= 2

                if not started:
                    raise Exception("Sqoop server took too long to come up.")

                def shutdown():
                    for f in finish:
                        f()
                    cluster.stop()

                callback = shutdown

                SqoopServerProvider.is_running = True
            else:
                client = SqoopClient(SERVER_URL.get(), username, language)

            return client, callback
示例#19
0
        'APP_DIRS':
        True,
    },
]

# Desktop doesn't use an auth profile module, because
# because it doesn't mesh very well with the notion
# of having multiple apps.  If your app needs
# to store data related to users, it should
# manage its own table with an appropriate foreign key.
AUTH_PROFILE_MODULE = None

LOGIN_REDIRECT_URL = "/"
LOGOUT_REDIRECT_URL = "/"  # For djangosaml2 bug.

PYLINTRC = get_run_root('.pylintrc')

# Custom CSRF Failure View
CSRF_FAILURE_VIEW = 'desktop.views.csrf_failure'

############################################################
# Part 4: Installation of apps
############################################################

_config_dir = os.getenv("HUE_CONF_DIR", get_desktop_root("conf"))

# Libraries are loaded and configured before the apps
appmanager.load_libs()
_lib_conf_modules = [
    dict(module=app.conf, config_key=None) for app in appmanager.DESKTOP_LIBS
    if app.conf is not None
class OozieServerProvider(object):
    """
  Setup a Oozie server.
  """
    OOZIE_TEST_PORT = '18001'
    OOZIE_HOME = get_run_root('ext/oozie/oozie')

    requires_hadoop = True
    is_oozie_running = False

    @classmethod
    def setup_class(cls):
        cls.cluster = pseudo_hdfs4.shared_cluster()
        cls.oozie, callback = cls._get_shared_oozie_server()
        cls.shutdown = [callback]

    @classmethod
    def wait_until_completion(cls, oozie_jobid, timeout=300.0, step=5):
        job = cls.oozie.get_job(oozie_jobid)
        start = time.time()

        while job.is_running() and (time.time() - start) < timeout:
            time.sleep(step)
            LOG.info('Checking status of %s...' % oozie_jobid)
            job = cls.oozie.get_job(oozie_jobid)
            LOG.info('[%d] Status after %d: %s' %
                     (time.time(), time.time() - start, job))

        logs = cls.oozie.get_job_log(oozie_jobid)

        if job.is_running():
            msg = "[%d] %s took more than %d to complete: %s" % (
                time.time(), oozie_jobid, timeout, logs)
            LOG.info(msg)
            raise Exception(msg)
        else:
            LOG.info('[%d] Job %s took %d: %s' %
                     (time.time(), job.id, time.time() - start, logs))

        return job

    @classmethod
    def _setup_conf_dir(cls, cluster):
        original_oozie_conf_dir = '%s/conf' % OozieServerProvider.OOZIE_HOME
        shutil.copytree(original_oozie_conf_dir,
                        cluster._tmppath('conf/oozie'))
        cls._write_oozie_site(cluster)

    @classmethod
    def _write_oozie_site(cls, cluster):
        oozie_configs = {
            'oozie.service.ProxyUserService.proxyuser.hue.hosts':
            '*',
            'oozie.service.ProxyUserService.proxyuser.hue.groups':
            '*',
            'oozie.service.HadoopAccessorService.hadoop.configurations':
            '*=%s' % cluster._tmppath('conf'),
            'oozie.db.schema.name':
            'oozie',
            'oozie.data.dir':
            cluster._tmppath('oozie_tmp_dir'),
            'oozie.service.JPAService.create.db.schema':
            'false',
            'oozie.service.JPAService.jdbc.driver':
            'org.apache.derby.jdbc.EmbeddedDriver',
            'oozie.service.JPAService.jdbc.url':
            'jdbc:derby:${oozie.data.dir}/${oozie.db.schema.name}-db;create=true',
            'oozie.service.JPAService.jdbc.username':
            '******',
            'oozie.service.JPAService.jdbc.password':
            '',
            'oozie.service.SchemaService.wf.ext.schemas':
            '''shell-action-0.1.xsd,shell-action-0.2.xsd,shell-action-0.3.xsd,email-action-0.1.xsd,hive-action-0.2.xsd,
            hive-action-0.3.xsd,hive-action-0.4.xsd,hive-action-0.5.xsd,sqoop-action-0.2.xsd,sqoop-action-0.3.xsd,
            sqoop-action-0.4.xsd,ssh-action-0.1.xsd,ssh-action-0.2.xsd,distcp-action-0.1.xsd,distcp-action-0.2.xsd,
            oozie-sla-0.1.xsd,oozie-sla-0.2.xsd,
            hive2-action-0.1.xsd,
            spark-action-0.1.xsd''',
            'oozie.service.ActionService.executor.ext.classes':
            '''org.apache.oozie.action.email.EmailActionExecutor,
            org.apache.oozie.action.hadoop.HiveActionExecutor,
            org.apache.oozie.action.hadoop.ShellActionExecutor,
            org.apache.oozie.action.hadoop.SqoopActionExecutor,
            org.apache.oozie.action.hadoop.DistcpActionExecutor,
            org.apache.oozie.action.hadoop.Hive2ActionExecutor,
            org.apache.oozie.action.ssh.SshActionExecutor,
            org.apache.oozie.action.oozie.SubWorkflowActionExecutor,
            org.apache.oozie.action.hadoop.SparkActionExecutor''',
            'oozie.service.coord.normal.default.timeout':
            120
        }
        write_config(oozie_configs,
                     cluster._tmppath('conf/oozie/oozie-site.xml'))

    @classmethod
    def _start_oozie(cls, cluster):
        """
    Start oozie process.
    """
        OozieServerProvider._setup_conf_dir(cluster)

        args = [OozieServerProvider.OOZIE_HOME + '/bin/oozied.sh', 'run']
        env = os.environ
        env['OOZIE_DATA'] = cluster._tmppath('oozie_tmp_dir')
        env['OOZIE_HTTP_PORT'] = OozieServerProvider.OOZIE_TEST_PORT
        conf_dir = os.path.join(cluster.log_dir, 'oozie')
        os.mkdir(conf_dir)
        env['OOZIE_LOG'] = conf_dir
        env['OOZIE_CONFIG'] = cluster._tmppath('conf/oozie')

        LOG.info("Executing %s, env %s, cwd %s" %
                 (repr(args), repr(env), cluster._tmpdir))
        process = subprocess.Popen(args=args,
                                   env=env,
                                   cwd=cluster._tmpdir,
                                   stdin=subprocess.PIPE)
        return process

    @classmethod
    def _reset_oozie(cls, cluster):
        env = os.environ

        env['OOZIE_DATA'] = cluster._tmppath('oozie_tmp_dir')

        args = [
            'rm', '-r',
            '%s/data/oozie-db' % cluster._tmppath('oozie_tmp_dir')
        ]
        LOG.info("Executing %s, env %s" % (args, env))
        subprocess.call(args, env=env)

        args = [
            OozieServerProvider.OOZIE_HOME + '/bin/ooziedb.sh', 'create',
            '-sqlfile', 'oozie.sql', '-run'
        ]
        LOG.info("Executing %s, env %s" % (args, env))
        subprocess.call(args, env=env)

    @classmethod
    def _setup_sharelib(cls):
        LOG.info("Copying Oozie sharelib")
        user_home = cls.cluster.fs.do_as_user(getpass.getuser(),
                                              cls.cluster.fs.get_home_dir)
        oozie_share_lib = user_home + '/share'
        cls.cluster.fs.do_as_user(getpass.getuser(),
                                  cls.cluster.fs.create_home_dir)

        env = os.environ
        args = [
            OozieServerProvider.OOZIE_HOME + '/bin/oozie-setup.sh', 'sharelib',
            'create', '-fs', cls.cluster.fs.fs_defaultfs, '-locallib',
            OozieServerProvider.OOZIE_HOME + '/oozie-sharelib.tar.gz'
        ]
        LOG.info("Executing %s, env %s" % (args, env))
        subprocess.call(args, env=env)
        LOG.info("Oozie sharelib copied to %s" % oozie_share_lib)

    @classmethod
    def _get_shared_oozie_server(cls):
        callback = lambda: None

        _oozie_lock.acquire()

        try:
            if not OozieServerProvider.is_oozie_running:
                cluster = pseudo_hdfs4.shared_cluster()

                if is_live_cluster():

                    def shutdown():
                        pass
                else:
                    LOG.info(
                        '\nStarting a Mini Oozie. Requires "tools/jenkins/jenkins.sh" to be previously ran.\n'
                    )
                    LOG.info(
                        'See https://issues.cloudera.org/browse/HUE-861\n')

                    finish = (OOZIE_URL.set_for_testing(
                        "http://%s:%s/oozie" %
                        (socket.getfqdn(),
                         OozieServerProvider.OOZIE_TEST_PORT)), )

                    # Setup
                    cls._setup_sharelib()
                    cls._reset_oozie(cluster)

                    p = cls._start_oozie(cluster)

                    def kill():
                        LOG.info("Killing Oozie server (pid %d)." % p.pid)
                        os.kill(p.pid, 9)
                        p.wait()

                    atexit.register(kill)

                    def shutdown():
                        for f in finish:
                            f()
                        cluster.stop()

                start = time.time()
                started = False
                sleep = 0.01

                while not started and time.time() - start < 30.0:
                    status = None
                    try:
                        LOG.info('Check Oozie status...')
                        status = get_oozie(
                            cluster.superuser).get_oozie_status()
                        if status['systemMode'] == 'NORMAL':
                            started = True
                            break
                        time.sleep(sleep)
                        sleep *= 2
                    except Exception, e:
                        LOG.info(
                            'Oozie server status not NORMAL yet: %s - %s' %
                            (status, e))
                        time.sleep(sleep)
                        sleep *= 2
                        pass

                if not started:
                    raise Exception("Oozie server took too long to come up.")

                OozieServerProvider.is_oozie_running = True
                callback = shutdown
        finally:
            _oozie_lock.release()

        cluster = pseudo_hdfs4.shared_cluster()
        return get_oozie(cluster.superuser), callback
示例#21
0
 def _get_yarn_bin(self, env):
   try:
     return env['YARN_BIN']
   except KeyError:
     return os.path.join(get_run_root('ext/hadoop/hadoop'), 'bin', 'yarn')
示例#22
0
 def _get_hdfs_bin(self, env):
     try:
         return env["HDFS_BIN"]
     except KeyError:
         return os.path.join(get_run_root("ext/hadoop/hadoop"), "bin", "hdfs")
示例#23
0
class OozieServerProvider(object):
    """
  Setup a Oozie server.
  """
    OOZIE_TEST_PORT = '18080'
    OOZIE_HOME = get_run_root('ext/oozie/oozie')

    requires_hadoop = True
    is_oozie_running = False

    @classmethod
    def setup_class(cls):
        cls.cluster = pseudo_hdfs4.shared_cluster()
        cls.oozie, callback = cls._get_shared_oozie_server()
        cls.shutdown = [callback]

    @classmethod
    def wait_until_completion(cls, oozie_jobid, timeout=300.0, step=5):
        job = cls.oozie.get_job(oozie_jobid)
        start = time.time()

        while job.is_running() and time.time() - start < timeout:
            time.sleep(step)
            LOG.info('Checking status of %s...' % oozie_jobid)
            job = cls.oozie.get_job(oozie_jobid)
            LOG.info('[%d] Status after %d: %s' %
                     (time.time(), time.time() - start, job))

        logs = cls.oozie.get_job_log(oozie_jobid)

        if job.is_running():
            msg = "[%d] %s took more than %d to complete: %s" % (
                time.time(), oozie_jobid, timeout, logs)
            LOG.info(msg)
            raise Exception(msg)
        else:
            LOG.info('[%d] Job %s tooke %d: %s' %
                     (time.time(), job.id, time.time() - start, logs))

        return job

    @classmethod
    def _start_oozie(cls, cluster):
        """
    Start oozie process.
    """
        args = [OozieServerProvider.OOZIE_HOME + '/bin/oozied.sh', 'run']
        env = os.environ
        env['OOZIE_HTTP_PORT'] = OozieServerProvider.OOZIE_TEST_PORT
        conf_dir = os.path.join(cluster.log_dir, 'oozie')
        os.mkdir(conf_dir)
        env['OOZIE_LOG'] = conf_dir

        LOG.info("Executing %s, env %s, cwd %s" %
                 (repr(args), repr(env), cluster._tmpdir))
        process = subprocess.Popen(args=args,
                                   env=env,
                                   cwd=cluster._tmpdir,
                                   stdin=subprocess.PIPE)
        return process

    @classmethod
    def _reset_oozie(cls):
        env = os.environ

        args = ['rm', '-r', OozieServerProvider.OOZIE_HOME + '/data/oozie-db']
        LOG.info("Executing %s, env %s" % (args, env))
        subprocess.call(args, env=env)

        args = [
            OozieServerProvider.OOZIE_HOME + '/bin/ooziedb.sh', 'create',
            '-sqlfile', 'oozie.sql', '-run'
        ]
        LOG.info("Executing %s, env %s" % (args, env))
        subprocess.call(args, env=env)

    @classmethod
    def _setup_sharelib(cls):
        cls.cluster.fs.do_as_user('oozie', cls.cluster.fs.create_home_dir,
                                  '/user/oozie')
        cls.cluster.fs.do_as_user('oozie', cls.cluster.fs.copyFromLocal,
                                  OozieServerProvider.OOZIE_HOME + '/share',
                                  '/user/oozie/')

    @classmethod
    def _get_shared_oozie_server(cls):
        callback = lambda: None

        _oozie_lock.acquire()

        if not OozieServerProvider.is_oozie_running:
            LOG.info(
                '\nStarting a Mini Oozie. Requires "tools/jenkins/jenkins.sh" to be previously ran.\n'
            )
            LOG.info('See https://issues.cloudera.org/browse/HUE-861\n')

            finish = (OOZIE_URL.set_for_testing(
                "http://localhost:%s/oozie" %
                OozieServerProvider.OOZIE_TEST_PORT), )

            # Setup
            cluster = pseudo_hdfs4.shared_cluster()
            cls._setup_sharelib()
            cls._reset_oozie()

            p = cls._start_oozie(cluster)

            def kill():
                LOG.info("Killing Oozie server (pid %d)." % p.pid)
                os.kill(p.pid, 9)
                p.wait()

            atexit.register(kill)

            start = time.time()
            started = False
            sleep = 0.01

            while not started and time.time() - start < 30.0:
                status = None
                try:
                    LOG.info('Check Oozie status...')
                    status = get_oozie().get_oozie_status()
                    if status['systemMode'] == 'NORMAL':
                        started = True
                        break
                    time.sleep(sleep)
                    sleep *= 2
                except Exception, e:
                    LOG.info('Oozie server status not NORMAL yet: %s - %s' %
                             (status, e))
                    time.sleep(sleep)
                    sleep *= 2
                    pass
            if not started:
                raise Exception("Oozie server took too long to come up.")

            OozieServerProvider.is_oozie_running = True

            def shutdown():
                for f in finish:
                    f()
                cluster.stop()

            callback = shutdown

        _oozie_lock.release()

        return get_oozie(), callback
示例#24
0
 def _get_hdfs_bin(self, env):
   try:
     return env['HDFS_BIN']
   except KeyError:
     return os.path.join(get_run_root('ext/hadoop/hadoop'), 'bin', 'hdfs')