def model_predict(query, data=None, model=None, test=False): """ example funtion to predict from model """ country = query['country'] year = query['year'] month = query['month'] day = query['day'] # start timer for runtime time_start = time.time() for d in [year, month, day]: if re.search("\D", d): raise Exception( "ERROR (model_predict) - invalid year, month or day") target_date = "{}-{}-{}".format(year, str(month).zfill(2), str(day).zfill(2)) if target_date not in data['dates']: raise Exception( "ERROR (model_predict) - date {} not in range {}-{}".format( target_date, data['dates'][0], data['dates'][-1])) date_indx = np.where(data['dates'] == target_date)[0][0] query = data['X'].iloc[[date_indx]] ## sainty check if data['dates'].shape[0] != data['X'].shape[0]: raise Exception("ERROR (model_predict) - dimensions mismatch") ## make prediction and gather data for log entry y_pred = model.predict(query) y_proba = None if 'predict_proba' in dir(model) and 'probability' in dir(model): if model.probability == True: y_proba = model.predict_proba(query) m, s = divmod(time.time() - time_start, 60) h, m = divmod(m, 60) runtime = "%03d:%02d:%02d" % (h, m, s) # update predict log update_predict_log(country, y_pred, y_proba, target_date, runtime, MODEL_VERSION, test=test) return ({'y_pred': y_pred, 'y_proba': y_proba})
def model_predict(query, model=None, test=False): """ example funtion to predict from model """ ## start timer for runtime time_start = time.time() query = pd.DataFrame(query) ## input checks if isinstance(query, dict): query = pd.DataFrame(query) elif isinstance(query, pd.DataFrame): pass else: raise Exception( "ERROR (model_predict) - invalid input. {} was given".format( type(query))) # ## features check # features = sorted(query.columns.tolist()) # if features != ['age', 'country', 'num_streams', 'subscriber_type']: # print("query features: {}".format(",".join(features))) # raise Exception("ERROR (model_predict) - invalid features present") ## load model if needed if not model: model = model_load() ## output checking if len(query.shape) == 1: query = query.reshape(1, -1) print(query) ## make prediction and gather data for log entry y_pred = model.predict(query) y_proba = 'None' m, s = divmod(time.time() - time_start, 60) h, m = divmod(m, 60) runtime = "%03d:%02d:%02d" % (h, m, s) ## update the log file for i in range(query.shape[0]): update_predict_log(y_pred[i], y_proba, query.iloc[i].values.tolist(), runtime, MODEL_VERSION, test=test) return ({'y_pred': y_pred, 'y_proba': y_proba})
def test_log_prediction(self): """ Ensure that content can be retrieved from log file """ log_file = os.path.join("logs", "predict-test.log") ## update the log y_pred = 100 runtime = "00:00:02" model_version = 0.1 model_note = "Prophet" test = True update_predict_log(y_pred, runtime, model_version, model_note, test) self.assertTrue(os.path.exists(log_file))
def test_04_predict(self): """ ensure that content can be retrieved from log file """ today = date.today() log_file = os.path.join( LOG_DIR, "predict-{}-{}.log".format(today.year, today.month)) ## update the log y_pred = [0] runtime = "00:00:02" model_version = 0.1 update_predict_log(y_pred, runtime, model_version, None, test=False) df = pd.read_csv(log_file) logged_y_pred = df['y_pred'].iloc[-1] self.assertEqual(str(y_pred), logged_y_pred)
def test_03_predict(self): """ ensure log file is created """ today = date.today() log_file = os.path.join( LOG_DIR, "predict-{}-{}.log".format(today.year, today.month)) if os.path.exists(log_file): os.remove(log_file) ## update the log y_pred = [0] runtime = "00:00:02" model_version = 0.1 update_predict_log(y_pred, runtime, model_version, None, test=False) self.assertTrue(os.path.exists(log_file))
def model_predict(country, year, month, day, model, model_data, test=False): """ example function to predict from model """ # start timer for runtime time_start = time.time() if not model: raise Exception("ERROR (model_predict) - model missing") for d in [year, month, day]: if re.search("\D", d): raise Exception("ERROR (model_predict) - invalid year, month or day") ## check date target_date = f"{year}-{str(month).zfill(2)}-{str(day).zfill(2)}" if target_date not in model_data['dates']: raise Exception(f"ERROR (model_predict) - date {target_date} not in range " f"{model_data['dates'][0]}-{model_data['dates'][-1]}") date_index = np.where(model_data['dates'] == target_date)[0][0] query = model_data['X'].iloc[[date_index]] y_known = model_data['y'][query.index] # sainty check if model_data['dates'].shape[0] != model_data['X'].shape[0]: raise Exception("ERROR (model_predict) - dimensions mismatch") # make prediction and gather data for log entry y_pred = model.predict(query) y_proba = None if 'predict_proba' in dir(model) and 'probability' in dir(model): if model.probability == True: y_proba = model.predict_proba(query) m, s = divmod(time.time() - time_start, 60) h, m = divmod(m, 60) runtime = "%03d:%02d:%02d" % (h, m, s) ## update predict log update_predict_log(country, y_pred, y_proba, target_date, runtime, MODEL_VERSION, test=test) return {'country':country, 'target_date':target_date,'y_known':y_known, 'y_pred': y_pred, 'y_proba': y_proba}
def model_predict(query, model=None): """ example funtion to predict from model """ ## start timer for runtime time_start = time.time() ## input checks if isinstance(query, dict): query = pd.DataFrame(query) elif isinstance(query, pd.DataFrame): pass else: raise Exception( "ERROR (model_predict) - invalid input. {} was given".format( type(query))) ## load model if needed if not model: model = model_load() ## output checking if len(query.shape) == 1: query = query.reshape(1, -1) ## make prediction and gather data for log entry y_pred = model.predict(query) y_proba = model.predict_proba(query) m, s = divmod(time.time() - time_start, 60) h, m = divmod(m, 60) runtime = "%03d:%02d:%02d" % (h, m, s) for i in range(query.shape[0]): update_predict_log(y_pred[i], y_proba, query.iloc[i].values.tolist(), runtime, MODEL_VERSION, test=False) return ({'y_pred': y_pred, 'y_proba': y_proba})
def test_03_predict(self): """ ensure log file is created """ log_file = os.path.join("logs","predict-test.log") if os.path.exists(log_file): os.remove(log_file) ## update the log y_pred = [0] y_proba = [0.6,0.4] runtime = "00:00:02" model_version = 0.1 query = ['united_states', 24, 'aavail_basic', 8] update_predict_log(y_pred,y_proba,query,runtime, model_version, test=True) self.assertTrue(os.path.exists(log_file))
def test_04_predict(self): """ ensure that content can be retrieved from log file """ log_file = os.path.join("logs","predict-test.log") ## update the log country = 'ERIE' y_pred = [15000] y_proba = None target_date = '2018-01-05' runtime = "00:00:02" MODEL_VERSION = 0.1 update_predict_log(country,y_pred,y_proba,target_date,runtime, MODEL_VERSION, test=True) df = pd.read_csv(log_file) logged_y_pred = [literal_eval(i) for i in df['y_pred'].copy()][-1] self.assertEqual(y_pred,logged_y_pred)
def test_04_predict(self): """ ensure that content can be retrieved from log file """ log_file = os.path.join("logs","predict-test.log") ## update the log y_pred = [0] y_proba = [0.6,0.4] runtime = "00:00:02" model_version = 0.1 query = ['united_states', 24, 'aavail_basic', 8] update_predict_log(y_pred,y_proba,query,runtime, model_version, test=True) df = pd.read_csv(log_file) logged_y_pred = [literal_eval(i) for i in df['y_pred'].copy()][-1] self.assertEqual(y_pred,logged_y_pred)
def test_03_predict(self): """ ensure log file is created """ log_file = os.path.join("logs","predict-test.log") if os.path.exists(log_file): os.remove(log_file) ## update the log country = 'ERIE' y_pred = [15000] y_proba = None target_date = '2018-01-05' runtime = "00:00:02" MODEL_VERSION = 0.1 update_predict_log(country,y_pred,y_proba,target_date,runtime, MODEL_VERSION, test=True) self.assertTrue(os.path.exists(log_file))
def test_04_predict(self): """ ensure that content can be retrieved from log file """ log_file = os.path.join("logs", "predict-test.log") ## update the log country = 'united states' y_pred = [0] y_proba = [0.6, 0.4] target_date = 24 runtime = "00:00:02" model_version = 0.1 update_predict_log(country, y_pred, y_proba, target_date, runtime, model_version, test=True) df = pd.read_csv(log_file) logged_y_pred = [literal_eval(i) for i in df['y_pred'].copy()][-1] self.assertEqual(y_pred, logged_y_pred)
def test_04_predict(self): """ ensure that content can be retrieved from log file """ today = date.today() log_file = os.path.join( "logs", "predict-{}-{}.log".format(today.year, today.month)) ## update the log country = 'United Kingdom' y_pred = 40000 runtime = "00:00:04" model_version = 0.1 target_date = "('2019',)-('08',)-01" update_predict_log(country, y_pred, target_date, runtime, model_version) df = pd.read_csv(log_file) logged_y_pred = [literal_eval(str(i)) for i in df['y_pred'].copy()][-1] self.assertEqual(y_pred, logged_y_pred)
def test_03_predict(self): """ ensure log file is created """ today = date.today() log_file = os.path.join( "logs", "predict-{}-{}.log".format(today.year, today.month)) if os.path.exists(log_file): os.remove(log_file) ## update the log country = 'United Kingdom' y_pred = 30000 runtime = "00:00:03" model_version = 0.1 target_date = "('2019',)-('08',)-01" update_predict_log(country, y_pred, target_date, runtime, model_version) self.assertTrue(os.path.exists(log_file))
def test_03_predict(self): """ ensure log file is created """ log_file = os.path.join("logs", "predict-test.log") if os.path.exists(log_file): os.remove(log_file) # update the log y_pred = [0] y_proba = None runtime = "00:00:02" model_version = 0.1 country = 'united_kingdom' target_date = "2021-01-01" update_predict_log(country, y_pred, y_proba, target_date, runtime, model_version, test=True) self.assertTrue(os.path.exists(log_file))
def test_03_predict(self): """ ensure log file is created """ log_file = os.path.join("logs", "predict-test.log") if os.path.exists(log_file): os.remove(log_file) ## update the log country = 'united states' y_pred = [0] y_proba = [0.6, 0.4] target_date = 24 runtime = "00:00:02" model_version = 0.1 update_predict_log(country, y_pred, y_proba, target_date, runtime, model_version, test=True) self.assertTrue(os.path.exists(log_file))
def test_04_predict(self): """ ensure that content can be retrieved from log file """ log_file = os.path.join(log_dir, "predict-test.log") # update the log prefix = 'unittest' country = "all" y_pred = [184154.256] y_proba = None target_date = "2018-01-05" runtime = "000:00:35" model_version = 0.1 test = True update_predict_log(prefix, country, y_pred, y_proba, target_date, runtime, model_version, test=test) df = pd.read_csv(log_file) logged_y_pred = [literal_eval(i) for i in df['y_pred'].copy()][-1] self.assertEqual(y_pred, logged_y_pred)
def model_predict(country, year, month, day, model): """ function to predict from model make a future dataframe for 30 days predict 30 days on the given model sum up predictions log output return the sum """ ## start timer for runtime time_start = time.time() ## check date target_date = "{}-{}-{}".format(year, str(month).zfill(2), str(day).zfill(2)) print(target_date) future = model.make_future_dataframe(periods=30) ## make prediction and gather data for log entry y_pred = model.predict(future.tail(30)) y_pred_output = y_pred[['ds', 'yhat']] y_pred_sum = y_pred_output['yhat'].sum() print('predictions:') print(y_pred_output) print('30 day sum: ' + str(y_pred_sum)) m, s = divmod(time.time() - time_start, 60) h, m = divmod(m, 60) runtime = "%03d:%02d:%02d" % (h, m, s) ## update the log file update_predict_log(country, y_pred_sum, target_date, runtime, MODEL_VERSION) return (y_pred_sum)
def model_predict(query, model=None, test=False): """example function to predict from model""" # start timer for runtime time_start = time.time() # input checks if isinstance(query, list): query = np.array([query]) # load model if needed if not model: model = model_load() # output checking if len(query.shape) == 1: query = query.reshape(1, -1) # make prediction and gather data for log entry y_pred = model.predict(query) y_proba = None if 'predict_proba' in dir(model) and model.probability == True: y_proba = model.predict_proba(query) m, s = divmod(time.time() - time_start, 60) h, m = divmod(m, 60) runtime = "%03d:%02d:%02d" % (h, m, s) # update the log file for i in range(query.shape[0]): update_predict_log(y_pred[i], y_proba[i], runtime, query.shape, MODEL_VERSION, test=test) return ({'y_pred': y_pred, 'y_proba': y_proba})
def test_03_predict(self): """ ensure log file is created """ log_file = os.path.join("logs", "predict-test.log") if os.path.exists(log_file): os.remove(log_file) ## YOUR CODE HERE ## Call the update_predict_log() function from logger.py with arbitrary input values and test if the log file ## exists in you file system using the assertTrue() base method from unittest. y_pred = [0] y_proba = [0.9,0.1] runtime = "00:00:00" model_version = 2.0 query = ['uk', 30, 'aavail_basic', 2] update_predict_log(y_pred,y_proba,query,runtime, model_version, test=True) self.assertTrue(os.path.exists(log_file))
def test_04_predict(self): log_file = os.path.join("logs", "predict-test.log") ## update the log country = 'united_kingdom' y_pred = [0] y_proba = [0.6, 0.4] target_date = '2018-12-01' runtime = "00:00:02" model_version = 0.1 update_predict_log(country, y_pred, y_proba, target_date, runtime, model_version, test=False) df = pd.read_csv(log_file) logged_y_pred = [literal_eval(i) for i in df['y_pred'].copy()][-1] self.assertEqual(y_pred, logged_y_pred)
def test_03_predict(self): log_file = os.path.join("logs", "predict-test.log") if os.path.exists(log_file): os.remove(log_file) ## update the log country = 'united_kingdom' y_pred = [0] y_proba = [0.6, 0.4] target_date = '2018-12-01' runtime = "00:00:02" model_version = 0.1 update_predict_log(country, y_pred, y_proba, target_date, runtime, model_version, test=True) self.assertTrue(os.path.exists(log_file))
def test_03_predict(self): """ ensure log file is created """ log_file = os.path.join("logs", "predict-test.log") if os.path.exists(log_file): os.remove(log_file) ## update the log y_pred = [0] y_proba = [0.6, 0.4] runtime = "00:00:02" model_version = 0.1 data_shape = (1, 10) update_predict_log(y_pred, y_proba, runtime, data_shape, model_version, test=True) self.assertTrue(os.path.exists(log_file))
def model_predict(country, year, month, day, all_models=None, test=False): """ example funtion to predict from model """ ## start timer for runtime time_start = time.time() ## load model if needed if not all_models: all_data, all_models = model_load(training=False) ## input checks if country not in all_models.keys(): raise Exception( "ERROR (model_predict) - model for country '{}' could not be found" .format(country)) for d in [year, month, day]: if re.search("\D", d): raise Exception( "ERROR (model_predict) - invalid year, month or day") ## load data model = all_models[country] data = all_data[country] ## check date target_date = "{}-{}-{}".format(year, str(month).zfill(2), str(day).zfill(2)) print(target_date) if target_date not in data['dates']: raise Exception( "ERROR (model_predict) - date {} not in range {}-{}".format( target_date, data['dates'][0], data['dates'][-1])) date_indx = np.where(data['dates'] == target_date)[0][0] query = data['X'].iloc[[date_indx]] ## sainty check if data['dates'].shape[0] != data['X'].shape[0]: raise Exception("ERROR (model_predict) - dimensions mismatch") ## make prediction and gather data for log entry y_pred = model.predict(query) y_proba = None if 'predict_proba' in dir(model) and 'probability' in dir(model): if model.probability == True: y_proba = model.predict_proba(query) m, s = divmod(time.time() - time_start, 60) h, m = divmod(m, 60) runtime = "%03d:%02d:%02d" % (h, m, s) ## update predict log update_predict_log(country, y_pred, y_proba, target_date, runtime, MODEL_VERSION, test=test) return ({'y_pred': y_pred, 'y_proba': y_proba})
def model_predict(country, year, month, day, all_models=None, test=False, from_pickle=False): """ example funtion to predict from model """ ## start timer for runtime time_start = time.time() # Load all data & models from a pickle file can speed things up a lot, great for the web app if from_pickle: version_ = re.sub("\.", "_", str(MODEL_VERSION)) all_data, all_models = pickle.load( open(os.path.join("models", f"all_data_model-{version_}.pickle"), "rb")) else: if not all_models: all_data, all_models = model_load(training=False) ## input checks if country not in all_models.keys(): raise Exception( f"ERROR (model_predict) - model for country '{country}' could not be found" ) for d in [year, month, day]: if re.search("\D", d): raise Exception( "ERROR (model_predict) - invalid year, month or day") ## load data model = all_models[country] data = all_data[country] ## check date target_date = f"{year}-{str(month).zfill(2)}-{str(day).zfill(2)}" print(target_date) if target_date not in data['dates']: raise Exception( f"ERROR (model_predict) - date {target_date} not in range {data['dates'][0]}-{data['dates'][-1]}" ) date_indx = np.where(data['dates'] == target_date)[0][0] query = data['X'].iloc[[date_indx]] ## sainty check if data['dates'].shape[0] != data['X'].shape[0]: raise Exception("ERROR (model_predict) - dimensions mismatch") ## make prediction and gather data for log entry y_pred = model.predict(query) y_proba = None if 'predict_proba' in dir(model) and 'probability' in dir(model): if model.probability == True: y_proba = model.predict_proba(query) m, s = divmod(time.time() - time_start, 60) h, m = divmod(m, 60) runtime = "%03d:%02d:%02d" % (h, m, s) ## update predict log update_predict_log(country, y_pred, y_proba, target_date, runtime, MODEL_VERSION, test=test) return ({'y_pred': y_pred, 'y_proba': y_proba})
def model_predict(): ## input checking if not request.json: print("ERROR: API (predict): did not receive request data") return jsonify([]) if request.json['type'] == 'dict': pass else: print("ERROR API (predict): only dict data types have been implemented") return jsonify([]) ## extract the query data = request.json['query'] if request.json['type'] == 'dict': pass else: print("ERROR API (predict): only dict data types have been implemented") return jsonify([]) num_periods = 30 ## input checking #0302211000 old way of getting query data #data = request.get_json(force=True) #get the number of months to forecast #select the country model try:#value = int(data['value']) country = data['country'] idx_start_date = data['date'] except (KeyError,TypeError,ValueError): raise JsonError(description='Invalid value') idx_start_date = datetime.strptime(idx_start_date, '%d/%m/%Y') end_period = idx_start_date + timedelta(num_periods) #select model based on country str_country = country.lower() saved_model = str_country+"-"+"sales-arima-0_1.joblib" model = joblib.load(os.path.join(MODEL_DIR, saved_model)) # We can compute predictions the same way we would on a normal ARIMA object: ## input checking print("... predicting") ## start timer for runtime time_start = time.time() #preds, conf_int = pipe.predict(n_periods=periods, return_conf_int=True) preds, conf_int = model.predict(start=idx_start_date,end=end_period, return_conf_int=True) #index_of_fc = pd.date_range(ts.index[-1], periods = n_periods, freq='MS') index_of_fc = pd.date_range(idx_start_date, periods = len(preds), freq='D') # make series for plotting purpose fitted_series = pd.Series(preds, index=index_of_fc) df_series = fitted_series.to_frame() df_series.columns = ["proj_sales"] avgrevpred = df_series.proj_sales.mean().round(3) #print predicted values ## make prediction and gather data for log entry y_proba = None if 'predict_proba' in dir(model) and model.probability == True: y_proba = model.predict_proba(n_periods=1) m, s = divmod(time.time()-time_start, 60) h, m = divmod(m, 60) runtime = "%03d:%02d:%02d"%(h, m, s) ## update the log file #_update_predict_log(preds, y_proba,country, runtime) #update_predict_log("[0]", "[0.6,0.4]","['united_states', 24, 'aavail_basic', 8]","00:00:01", MODEL_VERSION, test=True) update_predict_log(preds, y_proba,data, runtime, MODEL_VERSION, test=False) #print results to outfile update_target(os.path.join(PRED_DIR,str_country+'-preds.csv'),df_series,overwrite=True) #_update_target(df_series) #return jsonify(preds,conf_int) #return jsonify(preds.tolist()) #return jsonify(avgrevpred) return json_response(Predrevenue=avgrevpred)
def model_predict(date, country, df=None, model=None, test=False): """ example funtion to predict from model """ ## start timer for runtime time_start = time.time() print("model_predicted started", time_start) ## input checks try: datetime.strptime(date, '%Y-%m-%d') except ValueError: raise Exception( "ERROR (model_predict) - invalid input date {} was given".format( date)) if isinstance(country, str): pass else: raise Exception( "ERROR (model_predict) - invalid input country {} was given". format(country)) if df is None: df = load_data() ## make prediction and gather data for log entry if (country != "all"): ts = df[df["country"] == country].sort_values(by="invoice_date") else: ts = df.sort_values(by="invoice_date") ts = ts.groupby("invoice_date")["price"].sum() nsteps = days_between(str(ts[:-2:-1].keys()[0].date()), date) model = ARIMA(ts, order=(8, 0, 8)) results_ARIMA = model.fit(disp=-1) predicted = results_ARIMA.predict(start=len(ts), end=len(ts) + nsteps, exog=None, typ='linear', dynamic=False) rangeDates = np.array([ ts[:-2:-1].keys()[0].date() + timedelta(x) for x in range(nsteps + 1) ], dtype='datetime64[D]') predicted = pd.Series(predicted.values, rangeDates) y_proba = 'None' m, s = divmod(time.time() - time_start, 60) h, m = divmod(m, 60) runtime = "%03d:%02d:%02d" % (h, m, s) ## update the log file for i in range(len(predicted)): update_predict_log(predicted[i], date, country, runtime, MODEL_VERSION, test=test) return ({'predicted': predicted})