-
Notifications
You must be signed in to change notification settings - Fork 0
/
deneme2.py
27 lines (21 loc) · 826 Bytes
/
deneme2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
# Create a local StreamingContext with two working threads and a batch interval of 2 seconds
sc = SparkContext("local[2]", "Sensor")
ssc = StreamingContext(sc, 20)
# Create a DStream
lines = ssc.socketTextStream("sandbox-hdp.hortonworks.com", 3333)
# Bazic reduceByKey example in python
# creating PairRDD x with key value pairs
xx = sc.parallelize([("a", 1), ("b", 1), ("a", 1), ("a", 1),
("b", 1), ("b", 1), ("b", 1), ("b", 1)], 3)
# Applying reduceByKey operation on x
y = xx.reduceByKey(lambda accum, n: accum + n)
print(y.collect())
# [('b', 5), ('a', 3)]
# Define associative function separately
def sumFunc(accum, n):
return accum + n
y = xx.reduceByKey(sumFunc)
print(y.collect())
# [('b', 5), ('a', 3)]