示例#1
0
 def setUp(self):
     #Initialize all objects
     self.cos = CaffeOnSpark(sc)
     cmdargs = conf.get('spark.pythonargs')
     self.args = dict(self.grouper(cmdargs.split(), 2))
     self.cfg = Config(sc, self.args)
     self.train_source = DataSource(sc).getSource(self.cfg, True)
     self.validation_source = DataSource(sc).getSource(self.cfg, False)
示例#2
0
"""
This function calls CaffeOnSpark to train the model.  It is similar in 
structure to the LeNext example, e.g., see
https://github.com/yahoo/CaffeOnSpark/wiki/GetStarted_python
In fact, the Python interface for CaffeOnSpark currently (July 2016)
allows for very little deviation from this format. 
"""

if __name__ == '__main__':

    sparkConf = SparkConf().setAppName("BeijingTomorrow").setMaster("local")
    sc = SparkContext(conf=sparkConf)
    registerContext(sc)
    sqlContext = SQLContext(sc)
    registerSQLContext(sqlContext)
    cos = CaffeOnSpark(sc, sqlContext)
    cfg = Config(sc)
    this_file = os.path.abspath(inspect.getfile(inspect.currentframe()))
    project_dir = os.path.dirname(os.path.dirname(os.path.dirname(this_file)))
    visualProtoFile = os.path.join(
        project_dir,
        "resources/caffe_prototxt/beijing_pollution_solver_visual.prototxt")
    visualModelFile = os.path.join(
        project_dir,
        "resources/caffe_models/beijing_pollution_model_visual.model")
    aerosolProtoFile = os.path.join(
        project_dir,
        "resources/caffe_prototxt/beijing_pollution_solver_aerosol.prototxt")
    aerosolModelFile = os.path.join(
        project_dir,
        "resources/caffe_models/beijing_pollution_model_aerosol.model")
示例#3
0
from pyspark import SparkConf, SparkContext
from itertools import izip_longest
from pyspark.mllib.regression import LabeledPoint
from pyspark.mllib.classification import LogisticRegressionWithLBFGS
from pyspark.sql import SQLContext


def grouper(iterable, n, fillvalue=None):
    args = [iter(iterable)] * n
    return izip_longest(fillvalue=fillvalue, *args)


conf = SparkConf()
sc = SparkContext(conf=conf)
#Initialize all objects
cos = CaffeOnSpark(sc)
cmdargs = conf.get('spark.pythonargs')
args = dict(grouper(cmdargs.split(), 2))
cfg = Config(sc, args)
dl_train_source = DataSource(sc).getSource(cfg, True)
#Train
cos.train(dl_train_source)
lr_raw_source = DataSource(sc).getSource(cfg, False)
#Extract features
extracted_df = cos.features(lr_raw_source)
# Do multiclass LogisticRegression
data = extracted_df.map(
    lambda row: LabeledPoint(row.label[0], Vectors.dense(row.ip1)))
lr = LogisticRegressionWithLBFGS.train(data, numClasses=10, iterations=10)
predictions = lr.predict(data.map(lambda pt: pt.features))