def init(): os.environ["CLASSPATH"] = "%s:%s:%s" % ( pydoop.hadoop_classpath(), _ORIG_CLASSPATH, pydoop.hadoop_conf() ) os.environ["LIBHDFS_OPTS"] = os.getenv( "LIBHDFS_OPTS", common.DEFAULT_LIBHDFS_OPTS ) + " -Djava.library.path=%s" % pydoop.hadoop_native()
def get_task_trackers(properties=None, hadoop_conf_dir=None, offline=False): """ Get the list of task trackers in the Hadoop cluster. Each element in the returned list is in the ``(host, port)`` format. ``properties`` is passed to :func:`run_cmd`. If ``offline`` is True, try getting the list of task trackers from the 'slaves' file in Hadoop's configuration directory (no attempt is made to contact the Hadoop daemons). In this case, ports are set to 0. """ if offline: if not hadoop_conf_dir: hadoop_conf_dir = pydoop.hadoop_conf() slaves = os.path.join(hadoop_conf_dir, "slaves") try: with open(slaves) as f: task_trackers = [(l.strip(), 0) for l in f] except IOError: task_trackers = [] else: stdout = run_cmd("job", ["-list-active-trackers"], properties=properties, hadoop_conf_dir=hadoop_conf_dir) task_trackers = [] for l in stdout.splitlines(): if not l: continue l = l.split(":") task_trackers.append((l[0].split("_")[1], int(l[-1]))) return task_trackers
def get_task_trackers(properties=None, hadoop_conf_dir=None, offline=False): """ Get the list of task trackers in the Hadoop cluster. Each element in the returned list is in the ``(host, port)`` format. All arguments are passed to :func:`run_class`. If ``offline`` is :obj:`True`, try getting the list of task trackers from the ``slaves`` file in Hadoop's configuration directory (no attempt is made to contact the Hadoop daemons). In this case, ports are set to 0. """ if offline: if not hadoop_conf_dir: hadoop_conf_dir = pydoop.hadoop_conf() slaves = os.path.join(hadoop_conf_dir, "slaves") try: with open(slaves) as f: task_trackers = [(l.strip(), 0) for l in f] except IOError: task_trackers = [] else: # run JobClient directly (avoids "hadoop job" deprecation) stdout = run_class( "org.apache.hadoop.mapred.JobClient", ["-list-active-trackers"], properties=properties, hadoop_conf_dir=hadoop_conf_dir, keep_streams=True ) task_trackers = [] for line in stdout.splitlines(): if not line: continue line = line.split(":") task_trackers.append((line[0].split("_")[1], int(line[-1]))) return task_trackers
def get_task_trackers(properties=None, hadoop_conf_dir=None, offline=False): """ Get the list of task trackers in the Hadoop cluster. Each element in the returned list is in the ``(host, port)`` format. All arguments are passed to :func:`run_class`. If ``offline`` is :obj:`True`, try getting the list of task trackers from the ``slaves`` file in Hadoop's configuration directory (no attempt is made to contact the Hadoop daemons). In this case, ports are set to 0. """ if offline: if not hadoop_conf_dir: hadoop_conf_dir = pydoop.hadoop_conf() slaves = os.path.join(hadoop_conf_dir, "slaves") try: with open(slaves) as f: task_trackers = [(l.strip(), 0) for l in f] except IOError: task_trackers = [] else: # run JobClient directly (avoids "hadoop job" deprecation) stdout = run_class("org.apache.hadoop.mapred.JobClient", ["-list-active-trackers"], properties=properties, hadoop_conf_dir=hadoop_conf_dir, keep_streams=True) task_trackers = [] for l in stdout.splitlines(): if not l: continue l = l.split(":") task_trackers.append((l[0].split("_")[1], int(l[-1]))) return task_trackers
def init(): os.environ["CLASSPATH"] = "%s:%s:%s" % ( pydoop.hadoop_classpath(), _ORIG_CLASSPATH, pydoop.hadoop_conf() ) os.environ["LIBHDFS_OPTS"] = os.getenv( "LIBHDFS_OPTS", common.DEFAULT_LIBHDFS_OPTS )
def test_conf(self): os.environ['HADOOP_CONF_DIR'] = self.wd # silence Hadoop 3 warning with open(os.path.join(self.wd, 'log4j.properties'), 'w'): pass reload(pydoop) self.assertEqual(pydoop.hadoop_conf(), self.wd)
import sys import os import random import uuid import tempfile import imp import unittest import shutil import warnings import pydoop _HADOOP_HOME = pydoop.hadoop_home() _HADOOP_CONF_DIR = pydoop.hadoop_conf() _RANDOM_DATA_SIZE = 32 _DEFAULT_HDFS_HOST = "localhost" _DEFAULT_HDFS_PORT = 8020 if pydoop.is_cloudera() else 9000 _DEFAULT_BYTES_PER_CHECKSUM = 512 HDFS_HOST = os.getenv("HDFS_HOST", _DEFAULT_HDFS_HOST) HDFS_PORT = os.getenv("HDFS_PORT", _DEFAULT_HDFS_PORT) def _get_special_chr(): """ This is used to check unicode support. On some systems, depending on locale settings, we won't be able to use non-ASCII characters when interacting with system calls. Since in such cases it doesn't really make sense to run these tests we set UNI_CHR to a regular ASCII character.
def test_conf(self): if os.environ.has_key('HADOOP_CONF_DIR'): self.assertEqual(os.environ['HADOOP_CONF_DIR'], pydoop.hadoop_conf())
def test_conf(self): os.environ['HADOOP_CONF_DIR'] = self.wd reload(pydoop) self.assertEqual(pydoop.hadoop_conf(), self.wd)