Python HiveContext.getOrCreate示例

编程语言: Python

命名空间/包名称: pyspark

类/类型: HiveContext

方法/功能: getOrCreate

hotexamples.com的示例: 3

Python HiveContext.getOrCreate - 已找到3个示例。这些是从开源项目中提取的最受好评的pyspark.HiveContext.getOrCreate现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

HiveContext(30)

sql(30)

createDataFrame(17)

table(10)

setConf(10)

cacheTable(6)

registerDataFrameAsTable(4)

tables(4)

uncacheTable(3)

registerFunction(2)

getOrCreate(2)

_createForTesting(1)

map(1)

show(1)

count(1)

clearCache(1)

cache(1)

refreshTable(1)

示例#1

显示文件

文件： jsoncount.py 项目： weasellin/pyspark.test

""" wordcount example using the rdd api, we'll write a test for this """
from __future__ import print_function

import sys

from pyspark import SparkContext
from pyspark import HiveContext


def do_json_counts(df, target_name):
    """ count of records where name=target_name in a dataframe with column 'name' """

    return df.filter(df.name == target_name).count()


if __name__ == "__main__":

    if len(sys.argv) != 2:
        sys.exit("Usage: json file}")

    sc = SparkContext(appName="PythonJsonCount")
    hc = HiveContext.getOrCreate(sc)
    df = hc.read.json(sys.argv[1], 1)

    print("Name vikas found %d times" % do_json_counts(df, 'vikas'))

示例#2

显示文件

from valida_data_ref_carga import verifica_data_ref_carga

# ## Definições para o contexto do Spark

# In[4]:

conf = SparkConf().setAppName("Test_movto_validos").setMaster("local[2]")

# In[5]:

spark = SparkSession.builder.enableHiveSupport().config(
    conf=conf).getOrCreate()

# In[6]:

spark_hive = HiveContext.getOrCreate(spark)

# In[7]:

#retirar quando for implantar
#if sc.startTime != 0:    sc.stop()

# In[8]:

sc = SparkContext.getOrCreate()

# In[9]:

sql_ctx = SQLContext(sc)

# In[10]:

示例#3

显示文件

文件： jsoncount.py 项目： bomboradata/pyspark.test

""" wordcount example using the rdd api, we'll write a test for this """
from __future__ import print_function

import sys

from pyspark import SparkContext
from pyspark import HiveContext


def do_json_counts(df, target_name):
    """ count of records where name=target_name in a dataframe with column 'name' """

    return df.filter(df.name == target_name).count()


if __name__ == "__main__":
    
    if len(sys.argv) != 2:
        sys.exit("Usage: json file}")
    
    sc = SparkContext(appName="PythonJsonCount")
    hc = HiveContext.getOrCreate(sc)
    df = hc.read.json(sys.argv[1], 1)
    
    print("Name vikas found %d times" % do_json_counts(df, 'vikas'))