def setup_method(self, method):
     """ setup any state tied to the execution of the given method in a
     class.  setup_method is invoked for every test method of a class.
     """
     sparkConf = init_spark_conf().setMaster("local[4]").setAppName(
         "test feature set")
     self.sc = init_nncontext(sparkConf)
示例#2
0
 def create_sc(self, submit_args, conf):
     submit_args = submit_args + " pyspark-shell"
     os.environ["PYSPARK_SUBMIT_ARGS"] = submit_args
     spark_conf = init_spark_conf(conf)
     sc = init_nncontext(conf=spark_conf, spark_log_level=self.spark_log_level,
                         redirect_spark_log=self.redirect_spark_log)
     return sc
示例#3
0
 def setup_method(self, method):
     """ setup any state tied to the execution of the given method in a
     class.  setup_method is invoked for every test method of a class.
     """
     sparkConf = init_spark_conf().setMaster("local[1]").setAppName("testEstimator")
     self.sc = init_nncontext(sparkConf)
     self.sqlContext = SQLContext(self.sc)
     assert(self.sc.appName == "testEstimator")
示例#4
0
 def init_spark_on_local(self, cores, conf=None, python_location=None):
     print("Start to getOrCreate SparkContext")
     if "PYSPARK_PYTHON" not in os.environ:
         os.environ["PYSPARK_PYTHON"] = \
             python_location if python_location else detect_python_location()
     master = "local[{}]".format(cores)
     zoo_conf = init_spark_conf(conf).setMaster(master)
     sc = init_nncontext(conf=zoo_conf, spark_log_level=self.spark_log_level,
                         redirect_spark_log=self.redirect_spark_log)
     print("Successfully got a SparkContext")
     return sc
示例#5
0
    print(args)
    app_name = args.app_name
    data_source_path = args.data_source_path
    model_file_name = app_name + '.h5'
    save_model_dir = args.model_dir + model_file_name
    u_limit = int(args.u_limit)
    m_limit = int(args.m_limit)
    neg_rate = int(args.neg_rate)
    sliding_length = int(args.sliding_length)
    u_output = int(args.u_output)
    m_output = int(args.m_output)
    max_epoch = int(args.max_epoch)
    batch_size = int(args.batch_size)
    predict_output_path = args.inference_output_path

    sparkConf = init_spark_conf()
    sc = init_nncontext(sparkConf)
    spark = SparkSession \
    .builder \
    .appName(app_name) \
    .getOrCreate()

    start = time.time()
    uDF, mDF, tDF = ncf_features.load_csv(spark, data_source_path, u_limit,
                                          m_limit)
    trainingDF = ncf_features.genData(tDF, sc, spark, args.train_start,
                                      args.train_end, neg_rate, sliding_length,
                                      u_limit, m_limit)
    #trainingDF.show(5)
    validationDF = ncf_features.genData(tDF, sc, spark, args.validation_start,
                                        args.validation_end, neg_rate,