from pyspark.mllib.classification import SVMWithSGD from pyspark import SparkContext from pyspark import SparkConf from convert import * from reduce_dimension import * app_name = "WordCount" spark_master = "spark://Kingdom:7077" spark_home = "../spark-1.3.1-bin-hadoop2.4" conf = SparkConf() conf.setMaster(spark_master) conf.setSparkHome(spark_home) conf.setAppName(app_name) conf.set("spark.executor.memory", "1g") #conf.set("spark.akka.frameSize", "100") #conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") #conf.set("spark.kryoserializer.buffer.mb", "64") #conf.set("spark.executor.extraJavaOptions", "-XX:+UseCompressedOops") #conf.set("spark.storage.memoryFraction", "0.6") sc = SparkContext(conf=conf, pyFiles=["main.py", "convert.py", "attributes.py"]) # Return the sorted label, and the weight list def get_sorted_label(training_data): rdd = training_data.map(lambda x: int(x.label)) items = sorted(rdd.countByValue().items(), key=lambda x: x[0])
from collections import Counter from pyspark.mllib.classification import SVMWithSGD from pyspark import SparkContext from pyspark import SparkConf from convert import * from reduce_dimension import * app_name = "WordCount" spark_master = "spark://Kingdom:7077" spark_home = "../spark-1.3.1-bin-hadoop2.4" conf = SparkConf() conf.setMaster(spark_master) conf.setSparkHome(spark_home) conf.setAppName(app_name) conf.set("spark.executor.memory", "1g") #conf.set("spark.akka.frameSize", "100") #conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") #conf.set("spark.kryoserializer.buffer.mb", "64") #conf.set("spark.executor.extraJavaOptions", "-XX:+UseCompressedOops") #conf.set("spark.storage.memoryFraction", "0.6") sc = SparkContext(conf=conf, pyFiles=["main.py", "convert.py", "attributes.py"]) # Return the sorted label, and the weight list def get_sorted_label(training_data): rdd = training_data.map(lambda x: int(x.label)) items = sorted(rdd.countByValue().items(), key=lambda x: x[0])