-
Notifications
You must be signed in to change notification settings - Fork 0
/
timer.py
37 lines (30 loc) · 1.16 KB
/
timer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import sys, csv, math, time
from StringIO import StringIO
from pyspark import SparkConf, SparkContext
from pyspark.mllib.classification import LogisticRegressionWithLBFGS, LogisticRegressionModel
from pyspark.mllib.regression import LabeledPoint
from pyspark.mllib.classification import LogisticRegressionWithSGD
from pyspark.mllib.evaluation import BinaryClassificationMetrics
APP_NAME = "My Spark Application"
def parsePoint(line):
line = line.replace("[",'')
line = line.replace("]",'')
line = line.replace(" ",'')
values = [int(x) for x in line.split(',')]
return LabeledPoint(values[0], values[1:])
def main(sc):
train_data = sc.textFile("input/ctc_data.txt").map(parsePoint)
parsedTrainData = train_data.randomSplit(weights=[0.2, 0.8])
start = time.time()
model = LogisticRegressionWithSGD.train(parsedTrainData)
end = time.time()
time_elapsed = end - start
output = "\nusing SGD " + str(time_elapsed)
print output
if __name__ == "__main__":
# Configure Spark
conf = SparkConf().setAppName(APP_NAME)
conf = conf.setMaster("local[*]")
sc = SparkContext(conf=conf)
# Execute Main functionality
main(sc)