def __init__(self): env = Environment.create() config_context = env.get_config_items('webhdfs') if config_context is not None and 'port' in config_context: self.port = config_context['port'] self.user = config_context['user'] if 'user' in config_context else 'root' self.use_kerberos = config_context['kerberos'].lower() == 'true' else: self.port = None self.user = None self.use_kerberos = False
def __init__(self): env = Environment.create() config_context = env.get_config_items('webhdfs') if config_context is not None and 'port' in config_context: self.port = config_context['port'] self.user = config_context[ 'user'] if 'user' in config_context else 'root' self.use_kerberos = config_context['kerberos'].lower() == 'true' else: self.port = None self.user = None self.use_kerberos = False
def spark_cluster_mode(): """ Gets the cluster mode Returns ------- out: boolean True if spark is running in cluster mode. Cluster mode means that spark is running on a platform separate the program. In practice, cluster mode means that file arguments must be located on a network filesystem such as HDFS or NFS. """ env = Environment.create() config = create_spark_config(env) return not config.get("spark.master").startswith("local")
def spark_cluster_mode(): """ Gets the cluster mode Returns ------- out: boolean True if spark is running in cluster mode. Cluster mode means that spark is running on a platform separate the program. In practice, cluster mode means that file arguments must be located on a network filesystem such as HDFS or NFS. """ env = Environment.create() config = create_spark_config(env) return not config.get('spark.master').startswith('local')
def __init__(self): """ Create a spark context. The spark configuration is taken from xframes/config.ini and from the values set in SparkInitContext.set() if this has been called. """ # This is placed here because otherwise it causes an error when used in a spark slave. from pyspark import SparkConf, SparkContext, SQLContext, HiveContext # This reads from default.ini and then xframes/config.ini # if they exist. self._env = Environment.create() context = create_spark_config(self._env) verbose = self._env.get_config('xframes', 'verbose', 'false').lower() == 'true' hdfs_user_name = self._env.get_config('webhdfs', 'user', 'hdfs') os.environ['HADOOP_USER_NAME'] = hdfs_user_name config_pairs = [(k, v) for k, v in context.iteritems()] self._config = (SparkConf().setAll(config_pairs)) if verbose: print 'Spark Config: {}'.format(config_pairs) self._sc = SparkContext(conf=self._config) self._sqlc = SQLContext(self._sc) self._hivec = HiveContext(self._sc) self.zip_path = [] version = [int(n) for n in self._sc.version.split('.')] self.status_tracker = self._sc.statusTracker() if cmp(version, [1, 4, 1]) >= 0: self.application_id = self._sc.applicationId else: self.application_id = None if verbose: print 'Spark Version: {}'.format(self._sc.version) if self.application_id: print 'Application Id: {}'.format(self.application_id) if not context['spark.master'].startswith('local'): zip_path = self.build_zip(get_xframes_home()) if zip_path: self._sc.addPyFile(zip_path) self.zip_path.append(zip_path) trace_flag = self._env.get_config('xframes', 'rdd-trace', 'false').lower() == 'true' XRdd.set_trace(trace_flag) atexit.register(self.close_context)
def __init__(self): """ Create a spark context. The spark configuration is taken from xframes/config.ini and from the values set in SparkInitContext.set() if this has been called. """ # This is placed here because otherwise it causes an error when used in a spark slave. from pyspark import SparkConf, SparkContext, SQLContext, HiveContext # This reads from default.ini and then xframes/config.ini # if they exist. self._env = Environment.create() context = create_spark_config(self._env) verbose = self._env.get_config("xframes", "verbose", "false").lower() == "true" hdfs_user_name = self._env.get_config("webhdfs", "user", "hdfs") os.environ["HADOOP_USER_NAME"] = hdfs_user_name config_pairs = [(k, v) for k, v in context.iteritems()] self._config = SparkConf().setAll(config_pairs) if verbose: print "Spark Config: {}".format(config_pairs) self._sc = SparkContext(conf=self._config) self._sqlc = SQLContext(self._sc) self._hivec = HiveContext(self._sc) self.zip_path = [] version = [int(n) for n in self._sc.version.split(".")] self.status_tracker = self._sc.statusTracker() if cmp(version, [1, 4, 1]) >= 0: self.application_id = self._sc.applicationId else: self.application_id = None if verbose: print "Spark Version: {}".format(self._sc.version) if self.application_id: print "Application Id: {}".format(self.application_id) if not context["spark.master"].startswith("local"): zip_path = self.build_zip(get_xframes_home()) if zip_path: self._sc.addPyFile(zip_path) self.zip_path.append(zip_path) trace_flag = self._env.get_config("xframes", "rdd-trace", "false").lower() == "true" XRdd.set_trace(trace_flag) atexit.register(self.close_context)