示例#1
0
""" wordcount example using the rdd api, we'll write a test for this """
from __future__ import print_function

import sys

from pyspark import SparkContext
from pyspark import HiveContext


def do_json_counts(df, target_name):
    """ count of records where name=target_name in a dataframe with column 'name' """

    return df.filter(df.name == target_name).count()


if __name__ == "__main__":

    if len(sys.argv) != 2:
        sys.exit("Usage: json file}")

    sc = SparkContext(appName="PythonJsonCount")
    hc = HiveContext.getOrCreate(sc)
    df = hc.read.json(sys.argv[1], 1)

    print("Name vikas found %d times" % do_json_counts(df, 'vikas'))
示例#2
0
from valida_data_ref_carga import verifica_data_ref_carga

# ## Definições para o contexto do Spark

# In[4]:

conf = SparkConf().setAppName("Test_movto_validos").setMaster("local[2]")

# In[5]:

spark = SparkSession.builder.enableHiveSupport().config(
    conf=conf).getOrCreate()

# In[6]:

spark_hive = HiveContext.getOrCreate(spark)

# In[7]:

#retirar quando for implantar
#if sc.startTime != 0:    sc.stop()

# In[8]:

sc = SparkContext.getOrCreate()

# In[9]:

sql_ctx = SQLContext(sc)

# In[10]:
示例#3
0
""" wordcount example using the rdd api, we'll write a test for this """
from __future__ import print_function

import sys

from pyspark import SparkContext
from pyspark import HiveContext


def do_json_counts(df, target_name):
    """ count of records where name=target_name in a dataframe with column 'name' """

    return df.filter(df.name == target_name).count()


if __name__ == "__main__":
    
    if len(sys.argv) != 2:
        sys.exit("Usage: json file}")
    
    sc = SparkContext(appName="PythonJsonCount")
    hc = HiveContext.getOrCreate(sc)
    df = hc.read.json(sys.argv[1], 1)
    
    print("Name vikas found %d times" % do_json_counts(df, 'vikas'))