示例#1
0
class CustomModel:

    model = LinearRegressionModel()

    def __init__(self, pmodel):
        self.model = pmodel
        #self.model = LinearRegressionModel(pmodel)
        model = pmodel

    def __getstate__(self):
        # Copy the object's state from self.__dict__ which contains
        # all our instance attributes. Always use the dict.copy()
        # method to avoid modifying the original state.
        state = self.__dict__.copy()
        # Remove the unpicklable entries.
        del state['model']
        return state
        #return self.__dict__

    def __setstate__(self, state):
        # Restore instance attributes (i.e., filename and lineno).
        self.__dict__.update(state)

    def getModel(self):
        return self.model
示例#2
0
 def test_lr_evaluate_invaild_type(self):
     lr = LinearRegressionModel()
     invalid_type = ""
     self.assertRaises(TypeError, lr.evaluate, invalid_type)
import pika
import sys
import json
import pyspark
import time
from pyspark.ml.regression import LinearRegressionModel
from pyspark.ml.feature import VectorAssembler
import pandas as pd
import multiprocessing
import threading

database_features_ordered = ['VendorID','tpep_pickup_datetime','tpep_dropoff_datetime','passenger_count','trip_distance','RatecodeID','store_and_fwd_flag','PULocationID','DOLocationID','payment_type','fare_amount','extra','mta_tax','tip_amount','tolls_amount','improvement_surcharge','total_amount']
sc = pyspark.sql.SparkSession.builder.appName("nycApp").getOrCreate()
sc.sparkContext._conf.set('spark.executor.cores', multiprocessing.cpu_count())
print(sc.sparkContext._conf.getAll())
lm = LinearRegressionModel()
model_1 = lm.load("/home/gcpkey/lr.model")
topic = "streaming_data"
credentials = pika.PlainCredentials('user', 'QwwyqaQj1C4i')
parameters = pika.ConnectionParameters('35.247.117.124',5672,'/',credentials)
connection = pika.BlockingConnection(parameters)
connection1 = pika.BlockingConnection(parameters)
channel = connection.channel()
channel1 = connection1.channel()
channel1.queue_declare(queue="receivePredictedFareClient1")
channel.queue_declare(queue=topic)
def callback(ch, method, properties, body):
    df_message = pd.DataFrame.from_dict([json.loads(body.decode())])
    df_message = df_message[database_features_ordered]
    df_message_pyspark = sc.createDataFrame(df_message)
    df_message_pyspark.write.csv("hdfs://cluster-9bfd-m/hadoop/data1.csv", header=True, mode='append')