class CustomModel: model = LinearRegressionModel() def __init__(self, pmodel): self.model = pmodel #self.model = LinearRegressionModel(pmodel) model = pmodel def __getstate__(self): # Copy the object's state from self.__dict__ which contains # all our instance attributes. Always use the dict.copy() # method to avoid modifying the original state. state = self.__dict__.copy() # Remove the unpicklable entries. del state['model'] return state #return self.__dict__ def __setstate__(self, state): # Restore instance attributes (i.e., filename and lineno). self.__dict__.update(state) def getModel(self): return self.model
def test_lr_evaluate_invaild_type(self): lr = LinearRegressionModel() invalid_type = "" self.assertRaises(TypeError, lr.evaluate, invalid_type)
import pika import sys import json import pyspark import time from pyspark.ml.regression import LinearRegressionModel from pyspark.ml.feature import VectorAssembler import pandas as pd import multiprocessing import threading database_features_ordered = ['VendorID','tpep_pickup_datetime','tpep_dropoff_datetime','passenger_count','trip_distance','RatecodeID','store_and_fwd_flag','PULocationID','DOLocationID','payment_type','fare_amount','extra','mta_tax','tip_amount','tolls_amount','improvement_surcharge','total_amount'] sc = pyspark.sql.SparkSession.builder.appName("nycApp").getOrCreate() sc.sparkContext._conf.set('spark.executor.cores', multiprocessing.cpu_count()) print(sc.sparkContext._conf.getAll()) lm = LinearRegressionModel() model_1 = lm.load("/home/gcpkey/lr.model") topic = "streaming_data" credentials = pika.PlainCredentials('user', 'QwwyqaQj1C4i') parameters = pika.ConnectionParameters('35.247.117.124',5672,'/',credentials) connection = pika.BlockingConnection(parameters) connection1 = pika.BlockingConnection(parameters) channel = connection.channel() channel1 = connection1.channel() channel1.queue_declare(queue="receivePredictedFareClient1") channel.queue_declare(queue=topic) def callback(ch, method, properties, body): df_message = pd.DataFrame.from_dict([json.loads(body.decode())]) df_message = df_message[database_features_ordered] df_message_pyspark = sc.createDataFrame(df_message) df_message_pyspark.write.csv("hdfs://cluster-9bfd-m/hadoop/data1.csv", header=True, mode='append')