def forecastWork(modelDir, ind): ''' Run the model in its directory.''' (cellCapacity, dischargeRate, chargeRate, cellQuantity, cellCost) = \ [float(ind[x]) for x in ('cellCapacity', 'dischargeRate', 'chargeRate', 'cellQuantity', 'cellCost')] demandCharge = float(ind['demandCharge']) retailCost = float(ind['retailCost']) battEff = float(ind.get("batteryEfficiency")) / 100.0 dodFactor = float(ind.get('dodFactor')) / 100.0 projYears = int(ind.get('projYears')) batteryCycleLife = int(ind.get('batteryCycleLife')) battCapacity = cellQuantity * float(ind['cellCapacity']) * dodFactor o = {} try: with open(pJoin(modelDir, 'hist.csv'), 'w') as f: f.write(ind['histCurve'].replace('\r', '')) df = pd.read_csv(pJoin(modelDir, 'hist.csv'), parse_dates=['dates']) df['month'] = df['dates'].dt.month df['dayOfYear'] = df['dates'].dt.dayofyear assert df.shape[0] >= 26280 # must be longer than 3 years assert df.shape[1] == 5 except: raise Exception("CSV file is incorrect format.") confidence = float(ind['confidence']) / 100 # ---------------------- MAKE PREDICTIONS ------------------------------- # # train model on previous data all_X = fc.makeUsefulDf(df) all_y = df['load'] predictions = fc.neural_net_predictions(all_X, all_y) dailyLoadPredictions = [ predictions[i:i + 24] for i in range(0, len(predictions), 24) ] weather = df['tempc'][-8760:] dailyWeatherPredictions = [ weather[i:i + 24] for i in range(0, len(weather), 24) ] month = df['month'][-8760:] dispatched = [False] * 365 # decide to implement VBAT every day for a year VB_power, VB_energy = [], [] for i, (load24, temp24, m) in enumerate( zip(dailyLoadPredictions, dailyWeatherPredictions, month)): peak = max(load24) if fc.shouldDispatchPS(peak, m, df, confidence): dispatched[i] = True vbp, vbe = fc.pulp24hrBattery(load24, dischargeRate * cellQuantity, cellCapacity * cellQuantity, battEff) VB_power.extend(vbp) VB_energy.extend(vbe) else: VB_power.extend([0] * 24) VB_energy.extend([0] * 24) # -------------------- MODEL ACCURACY ANALYSIS -------------------------- # o['predictedLoad'] = predictions o['trainAccuracy'] = 0 #round(model.score(X_train, y_train) * 100, 2) o['testAccuracy'] = 0 #round(model.score(X_test, y_test) * 100, 2) # PRECISION AND RECALL maxDays = [] for month in range(1, 13): test = df[df['month'] == month] maxDays.append(test.loc[test['load'].idxmax()]['dayOfYear']) shouldHaveDispatched = [False] * 365 for day in maxDays: shouldHaveDispatched[day] = True truePositive = len([ b for b in [i and j for (i, j) in zip(dispatched, shouldHaveDispatched)] if b ]) falsePositive = len([ b for b in [i and (not j) for (i, j) in zip(dispatched, shouldHaveDispatched)] if b ]) falseNegative = len([ b for b in [(not i) and j for (i, j) in zip(dispatched, shouldHaveDispatched)] if b ]) o['precision'] = round( truePositive / float(truePositive + falsePositive) * 100, 2) o['recall'] = round( truePositive / float(truePositive + falseNegative) * 100, 2) o['number_of_dispatches'] = len([i for i in dispatched if i]) o['MAE'] = round( sum([ abs(l - m) / m * 100 for l, m in zip(predictions, list(all_y[-8760:])) ]) / 8760., 2) # ---------------------- FINANCIAL ANALYSIS ----------------------------- # # Calculate monthHours year = df[-8760:].copy() year.reset_index(inplace=True) year['hour'] = list(year.index) start = list(year.groupby('month').first()['hour']) finish = list(year.groupby('month').last()['hour']) monthHours = [(s, f + 1) for (s, f) in zip(start, finish)] demand = list(all_y[-8760:]) peakDemand = [max(demand[s:f]) for s, f in monthHours] demandAdj = [d + p for d, p in zip(demand, VB_power)] peakDemandAdj = [max(demandAdj[s:f]) for s, f in monthHours] discharges = [f if f < 0 else 0 for f in VB_power] # Monthly Cost Comparison Table o['monthlyDemand'] = peakDemand o['monthlyDemandRed'] = peakDemandAdj o['ps'] = [p - s for p, s in zip(peakDemand, peakDemandAdj)] o['benefitMonthly'] = [x * demandCharge for x in o['ps']] # Demand Before and After Storage Graph o['demand'] = demand o['demandAfterBattery'] = demandAdj o['batteryDischargekW'] = VB_power o['batteryDischargekWMax'] = max(VB_power) batteryCycleLife = float(ind['batteryCycleLife']) # Battery State of Charge Graph # Turn dc's SoC into a percentage, with dodFactor considered. o['batterySoc'] = SoC = [100 - (e / battCapacity * 100) for e in VB_energy] # Estimate number of cyles the battery went through. Sums the percent of SoC. cycleEquivalents = sum([ SoC[i] - SoC[i + 1] for i, x in enumerate(SoC[:-1]) if SoC[i + 1] < SoC[i] ]) / 100.0 o['cycleEquivalents'] = cycleEquivalents o['batteryLife'] = batteryCycleLife / cycleEquivalents # Cash Flow Graph # inserting battery efficiency only into the cashflow calculation # cashFlowCurve is $ in from peak shaving minus the cost to recharge the battery every day of the year cashFlowCurve = [sum(o['ps']) * demandCharge for year in range(projYears)] cashFlowCurve.insert(0, -1 * cellCost * cellQuantity) # insert initial investment # simplePayback is also affected by the cost to recharge the battery every day of the year o['SPP'] = (cellCost * cellQuantity) / (sum(o['ps']) * demandCharge) o['netCashflow'] = cashFlowCurve o['cumulativeCashflow'] = [ sum(cashFlowCurve[:i + 1]) for i, d in enumerate(cashFlowCurve) ] o['NPV'] = npv(float(ind['discountRate']), cashFlowCurve) battCostPerCycle = cellQuantity * cellCost / batteryCycleLife lcoeTotCost = cycleEquivalents * retailCost + battCostPerCycle * cycleEquivalents o['LCOE'] = lcoeTotCost / (cycleEquivalents * battCapacity) # Other o['startDate'] = '2011-01-01' # dc[0]['datetime'].isoformat() o['stderr'] = '' # Seemingly unimportant. Ask permission to delete. o['stdout'] = 'Success' o['months'] = [ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" ] return o
def workForecast(modelDir, ind): ''' Run the model in its directory.''' o = {} # Grab data from CSV, try: with open(pJoin(modelDir, 'hist.csv'), 'w') as f: f.write(ind['histCurve'].replace('\r', '')) df = pd.read_csv(pJoin(modelDir, 'hist.csv'), parse_dates=['dates']) df['month'] = df['dates'].dt.month df['dayOfYear'] = df['dates'].dt.dayofyear assert df.shape[0] >= 26280 # must be longer than 3 years assert df.shape[1] == 5 except: raise Exception("CSV file is incorrect format.") # train model on previous data all_X = fc.makeUsefulDf(df) all_y = df['load'] X_train, y_train = all_X[:-8760], all_y[:-8760] clf = linear_model.SGDRegressor(max_iter=10000, tol=1e-4) clf.fit(X_train, y_train) # ---------------------- MAKE PREDICTIONS ------------------------------- # X_test, y_test = all_X[-8760:], all_y[-8760:] predictions = clf.predict(X_test) dailyLoadPredictions = [ predictions[i:i + 24] for i in range(0, len(predictions), 24) ] P_lower, P_upper, E_UL = vbat24hr(ind, df['tempc'][-8760:]) dailyPl = [P_lower[i:i + 24] for i in range(0, len(P_lower), 24)] dailyPu = [P_upper[i:i + 24] for i in range(0, len(P_upper), 24)] dailyEu = [E_UL[i:i + 24] for i in range(0, len(E_UL), 24)] vbp, vbe = [], [] dispatched_d = [False] * 365 # Decide what days to dispatch zipped = zip(dailyLoadPredictions, df['month'][-8760:], dailyPl, dailyPu, dailyEu) for i, (load, m, pl, pu, eu) in enumerate(zipped): peak = max(load) if fc.shouldDispatchPS(peak, m, df, float(ind['confidence']) / 100): dispatched_d[i] = True p, e = fc.pulp24hrVbat(ind, load, pl, pu, eu) vbp.extend(p) vbe.extend(e) else: vbp.extend([0] * 24) vbe.extend([0] * 24) ### TESTING FOR ACCURACY ### assert len(dailyPl) == 365 assert all([len(i) == 24 for i in dailyPl]) VB_power, VB_energy = vbp, vbe # -------------------- MODEL ACCURACY ANALYSIS -------------------------- # o['predictedLoad'] = list(clf.predict(X_test)) o['trainAccuracy'] = round(clf.score(X_train, y_train) * 100, 2) o['testAccuracy'] = round(clf.score(X_test, y_test) * 100, 2) # PRECISION AND RECALL maxDays = [] for month in range(1, 13): test = df[df['month'] == month] maxDays.append(test.loc[test['load'].idxmax()]['dayOfYear']) shouldHaveDispatched = [False] * 365 for day in maxDays: shouldHaveDispatched[day] = True truePositive = len([ b for b in [i and j for (i, j) in zip(dispatched_d, shouldHaveDispatched)] if b ]) falsePositive = len([ b for b in [i and (not j) for (i, j) in zip(dispatched_d, shouldHaveDispatched)] if b ]) falseNegative = len([ b for b in [(not i) and j for (i, j) in zip(dispatched_d, shouldHaveDispatched)] if b ]) o['confidence'] = ind['confidence'] o['precision'] = round( truePositive / float(truePositive + falsePositive) * 100, 2) o['recall'] = round( truePositive / float(truePositive + falseNegative) * 100, 2) o['number_of_dispatches'] = len([i for i in dispatched_d if i]) o['MAE'] = round( sum([abs(l - m) / m * 100 for l, m in zip(predictions, list(y_test))]) / 8760., 2) # ---------------------- FINANCIAL ANALYSIS ----------------------------- # o['VBpower'], o['VBenergy'] = list(VB_power), list(VB_energy) # Calculate monthHours year = df[-8760:].copy() year.reset_index(inplace=True) year['hour'] = list(year.index) start = list(year.groupby('month').first()['hour']) finish = list(year.groupby('month').last()['hour']) monthHours = [(s, f + 1) for (s, f) in zip(start, finish)] demand = list(y_test) peakDemand = [max(demand[s:f]) for s, f in monthHours] energyMonthly = [sum(demand[s:f]) for s, f in monthHours] demandAdj = [d + p for d, p in zip(demand, o['VBpower'])] peakAdjustedDemand = [max(demandAdj[s:f]) for s, f in monthHours] energyAdjustedMonthly = [sum(demandAdj[s:f]) for s, f in monthHours] o['demand'] = demand o['peakDemand'] = peakDemand o['energyMonthly'] = energyMonthly o['demandAdjusted'] = demandAdj o['peakAdjustedDemand'] = peakAdjustedDemand o['energyAdjustedMonthly'] = energyAdjustedMonthly cellCost = float(ind['unitDeviceCost']) * float(ind['number_devices']) eCost = float(ind['electricityCost']) dCharge = float(ind['demandChargeCost']) o['VBdispatch'] = [dal - d for dal, d in zip(demandAdj, demand)] o['energyCost'] = [em * eCost for em in energyMonthly] o['energyCostAdjusted'] = [eam * eCost for eam in energyAdjustedMonthly] o['demandCharge'] = [peak * dCharge for peak in peakDemand] o['demandChargeAdjusted'] = [ pad * dCharge for pad in o['peakAdjustedDemand'] ] o['totalCost'] = [ ec + dcm for ec, dcm in zip(o['energyCost'], o['demandCharge']) ] o['totalCostAdjusted'] = [ eca + dca for eca, dca in zip(o['energyCostAdjusted'], o['demandChargeAdjusted']) ] o['savings'] = [ tot - tota for tot, tota in zip(o['totalCost'], o['totalCostAdjusted']) ] annualEarnings = sum(o['savings']) - float(ind['unitUpkeepCost']) * float( ind['number_devices']) cashFlowList = [annualEarnings] * int(ind['projectionLength']) cashFlowList.insert(0, -1 * cellCost) o['NPV'] = np.npv(float(ind['discountRate']) / 100, cashFlowList) o['SPP'] = cellCost / annualEarnings o['netCashflow'] = cashFlowList o['cumulativeCashflow'] = [ sum(cashFlowList[:i + 1]) for i, d in enumerate(cashFlowList) ] o['stdout'] = 'Success' return o
def work(modelDir, inputDict): """ Run the model in its directory.""" outData = {} rawData = [] actual = [] # write input file to modelDir sans carriage returns with open(pJoin(modelDir, "demandTemp.csv"), "w") as demandTempFile: demandTempFile.write(inputDict["demandTemp"].replace("\r", "")) try: with open(pJoin(modelDir, 'hist.csv'), 'w') as f: f.write(inputDict['nn'].replace('\r', '')) df = pd.read_csv(pJoin(modelDir, 'hist.csv')) assert df.shape[0] >= 26280 # must be longer than 3 years if 'dates' not in df.columns: df['dates'] = df.apply(lambda x: dt(int(x['year']), int(x[ 'month']), int(x['day']), int(x['hour'])), axis=1) except: raise Exception("Neural Net CSV file is incorrect format.") # neural net time all_X = loadForecast.makeUsefulDf(df) all_y = df["load"] nn_pred, nn_accuracy = loadForecast.neural_net_predictions(all_X, all_y) outData["actual_nn"] = df['load'][-8760:].tolist() # read it in as a list of lists try: with open(pJoin(modelDir, "demandTemp.csv")) as inFile: df = pd.read_csv(inFile, header=None) df.columns = ["load", "tempc"] df["dates"] = pd.date_range(start=inputDict["simStartDate"], freq="H", periods=df.shape[0]) print df.shape[0] except ZeroDivisionError: errorMessage = "CSV file is incorrect format. Please see valid format definition at <a target='_blank' href = 'https://github.com/dpinney/omf/wiki/Models-~-storagePeakShave#demand-file-csv-format'>\nOMF Wiki storagePeakShave - Demand File CSV Format</a>" raise Exception(errorMessage) rawData = df[["load", "tempc"]].fillna(0).values.tolist() del df """ # None -> 0, float-> string for i in range(len(rawData)): rawData[i] = [a if a else 0 for a in rawData[i]] rawData = list(np.float_(rawData)) """ # populate actual list for x in range(len(rawData)): actual.append(float(rawData[x][0])) (forecasted, MAPE) = loadForecast.rollingDylanForecast(rawData, float(inputDict["upBound"]), float(inputDict["lowBound"])) (exp, exp_MAPE) = loadForecast.exponentiallySmoothedForecast( rawData, float(inputDict["alpha"]), float(inputDict["beta"])) # parse json params for nextDayPeakKatrina try: params = json.loads(inputDict.get("katSpec", "{}")) except ValueError: params = {} pred_demand = loadForecast.nextDayPeakKatrinaForecast( rawData, inputDict["simStartDate"], modelDir, params) pred_demand = np.transpose(np.array(pred_demand)).tolist() # zucc it up prophet_partitions = int(inputDict.get("prophet", 0)) if prophet_partitions > 1: prophet, prophet_low, prophet_high = loadForecast.prophetForecast( rawData, inputDict["simStartDate"], modelDir, inputDict["prophet"]) # write our outData outData["startDate"] = inputDict["simStartDate"] outData["actual"] = actual outData["forecasted"] = forecasted outData["doubleExp"] = exp outData["neuralNet"] = nn_pred outData["MAPE"] = "%0.2f%%" % (MAPE * 100) outData["MAPE_exp"] = "%0.2f%%" % (exp_MAPE * 100) outData["MAPE_nn"] = "%0.2f%%" % nn_accuracy["test"] outData["peakDemand"] = pred_demand if prophet_partitions > 1: outData["prophet"] = prophet outData["prophetLow"] = prophet_low outData["prophetHigh"] = prophet_high return outData
def forecastWork(modelDir, ind): import tensorflow as tf ''' Run the model in its directory.''' (cellCapacity, dischargeRate, chargeRate, cellQuantity, cellCost) = \ [float(ind[x]) for x in ('cellCapacity', 'dischargeRate', 'chargeRate', 'cellQuantity', 'cellCost')] demandCharge = float(ind['demandCharge']) retailCost = float(ind['retailCost']) battEff = float(ind.get("batteryEfficiency")) / 100.0 dodFactor = float(ind.get('dodFactor')) / 100.0 projYears = int(ind.get('projYears')) batteryCycleLife = int(ind.get('batteryCycleLife')) battCapacity = cellQuantity * float(ind['cellCapacity']) * dodFactor o = {} try: with open(pJoin(modelDir, 'hist.csv'), 'w') as f: f.write(ind['histCurve'].replace('\r', '')) df = pd.read_csv(pJoin(modelDir, 'hist.csv')) assert df.shape[0] >= 26280 # must be longer than 3 years if df.shape[1] == 6: df['dates'] = df.apply(lambda x: dt(int(x['year']), int(x[ 'month']), int(x['day']), int(x['hour'])), axis=1) else: df = pd.read_csv(pJoin(modelDir, 'hist.csv'), parse_dates=['dates']) df['month'] = df.dates.dt.month df['dayOfYear'] = df['dates'].dt.dayofyear except: raise Exception("CSV file is incorrect format.") # ---------------------- MAKE PREDICTIONS ------------------------------- # # train model on previous data all_X = fc.makeUsefulDf(df) all_y = df['load'] if ind['newModel'] == 'True': model = None else: with open(pJoin(modelDir, 'neural_net.h5'), 'wb') as f: f.write(ind['model'].decode('base64')) model = tf.keras.models.load_model(pJoin(modelDir, 'neural_net.h5')) # model = tf.keras.models.load_model(ind['model']) predictions, accuracy = fc.neural_net_predictions( all_X, all_y, epochs=int(ind['epochs']), model=model, save_file=pJoin(modelDir, 'neural_net_model.h5')) dailyLoadPredictions = [ predictions[i:i + 24] for i in range(0, len(predictions), 24) ] weather = df['tempc'][-8760:] dailyWeatherPredictions = [ weather[i:i + 24] for i in range(0, len(weather), 24) ] # decide to implement VBAT every day for a year VB_power, VB_energy = [], [] for i, (load24, temp24) in enumerate( zip(dailyLoadPredictions, dailyWeatherPredictions)): vbp, vbe = pulp24hrBattery(load24, dischargeRate * cellQuantity, cellCapacity * cellQuantity, battEff) VB_power.extend(vbp) VB_energy.extend(vbe) # -------------------- MODEL ACCURACY ANALYSIS -------------------------- # o['predictedLoad'] = predictions o['trainAccuracy'] = 100 - round(accuracy['train'], 1) o['testAccuracy'] = 100 - round(accuracy['test'], 1) # ---------------------- FINANCIAL ANALYSIS ----------------------------- # # Calculate monthHours year = df[-8760:].copy() year.reset_index(inplace=True) year['hour'] = list(year.index) start = list(year.groupby('month').first()['hour']) finish = list(year.groupby('month').last()['hour']) monthHours = [(s, f + 1) for (s, f) in zip(start, finish)] demand = list(df['load'][-8760:]) peakDemand = [max(demand[s:f]) for s, f in monthHours] demandAdj = [d + p for d, p in zip(demand, VB_power)] peakDemandAdj = [max(demandAdj[s:f]) for s, f in monthHours] # Monthly Cost Comparison Table o['monthlyDemand'] = peakDemand o['monthlyDemandRed'] = peakDemandAdj o['ps'] = [p - s for p, s in zip(peakDemand, peakDemandAdj)] o['benefitMonthly'] = [x * demandCharge for x in o['ps']] # Demand Before and After Storage Graph o['demand'] = demand o['demandAfterBattery'] = demandAdj o['batteryDischargekW'] = VB_power o['batteryDischargekWMax'] = max(VB_power) batteryCycleLife = float(ind['batteryCycleLife']) o['batterySoc'] = SoC = [100 - (e / battCapacity * 100) for e in VB_energy] cycleEquivalents = sum([ SoC[i] - SoC[i + 1] for i, x in enumerate(SoC[:-1]) if SoC[i + 1] < SoC[i] ]) / 100.0 o['cycleEquivalents'] = cycleEquivalents o['batteryLife'] = batteryCycleLife / (cycleEquivalents + 10) # Cash Flow Graph cashFlowCurve = [sum(o['ps']) * demandCharge for year in range(projYears)] cashFlowCurve.insert(0, -1 * cellCost * cellQuantity) # insert initial investment o['SPP'] = (cellCost * cellQuantity) / (sum(o['ps']) * demandCharge) o['netCashflow'] = cashFlowCurve o['cumulativeCashflow'] = [ sum(cashFlowCurve[:i + 1]) for i, d in enumerate(cashFlowCurve) ] o['NPV'] = npv(float(ind['discountRate']), cashFlowCurve) battCostPerCycle = cellQuantity * cellCost / batteryCycleLife lcoeTotCost = cycleEquivalents * retailCost + battCostPerCycle * cycleEquivalents o['LCOE'] = lcoeTotCost / (cycleEquivalents * battCapacity + 10) model # Other o['startDate'] = '2011-01-01' o['stderr'] = '' o['stdout'] = 'Success' return o
def work(modelDir, ind): ''' Model processing done here. ''' epochs = int(ind['epochs']) o = {} # See bottom of file for out's structure try: with open(pJoin(modelDir, 'hist.csv'), 'w') as f: f.write(ind['histCurve'].replace('\r', '')) df = pd.read_csv(pJoin(modelDir, 'hist.csv')) assert df.shape[0] >= 26280, 'At least 3 years of data is required' if 'dates' not in df.columns: df['dates'] = df.apply( lambda x: dt( int(x['year']), int(x['month']), int(x['day']), int(x['hour'])), axis=1 ) except: raise Exception("Load CSV file is incorrect format.") try: weather = [float(i) for i in ind['tempCurve'].split('\n') if i != ''] assert len(weather) == 72, "weather csv in wrong format" except: raise Exception(ind['tempCurve']) # ---------------------- MAKE PREDICTIONS ------------------------------- # df = df.sort_values('dates') # df = autofill(df) d = dict(df.groupby(df.dates.dt.date)['dates'].count()) df = df[df['dates'].dt.date.apply(lambda x: d[x] == 24)] # find all non-24 df, tomorrow = lf.add_day(df, weather[:24]) all_X, all_y = lf.makeUsefulDf(df, structure="3D") if ind['newModel'] == 'False': for day in ['one_day_model', 'two_day_model', 'three_day_model']: with open(pJoin(modelDir, ind[day+'_filename']), 'wb') as f: f.write(base64.standard_b64decode(ind[day])) tomorrow_load, model, tomorrow_accuracy = lf.neural_net_next_day( all_X, all_y, epochs=epochs, save_file=pJoin(modelDir, 'one_day_model.h5'), model=(None if ind['newModel'] == 'True' else tf.keras.models.load_model(pJoin(modelDir, ind['one_day_model_filename']))), structure="3D" ) o['tomorrow_load'] = tomorrow_load o['month_start'] = dt(tomorrow.year, tomorrow.month, 1).strftime("%A, %B %-d, %Y") o['forecast_start'] = tomorrow.strftime("%A, %B %-d, %Y") # second day df, second_day = lf.add_day(df, weather[24:48]) if second_day.month == tomorrow.month: all_X, all_y = lf.makeUsefulDf(df, hours_prior=48, noise=5, structure="3D") two_day_predicted_load, two_day_model, two_day_load_accuracy = lf.neural_net_next_day( all_X, all_y, epochs=epochs, hours_prior=48, save_file=pJoin(modelDir, 'two_day_model.h5'), model=(None if ind['newModel'] == 'True' else tf.keras.models.load_model(pJoin(modelDir, ind['two_day_model_filename']))), structure="3D" ) two_day_peak = max(two_day_predicted_load) # third day df, third_day = lf.add_day(df, weather[48:72]) if third_day.month == tomorrow.month: all_X, all_y = lf.makeUsefulDf(df, hours_prior=72, noise=15, structure="3D") three_day_predicted_load, three_day_model, three_day_load_accuracy = lf.neural_net_next_day( all_X, all_y, epochs=epochs, hours_prior=72, save_file=pJoin(modelDir, 'three_day_model.h5'), model=(None if ind['newModel'] == 'True' else tf.keras.models.load_model(pJoin(modelDir, ind['three_day_model_filename']))), structure="3D" ) three_day_peak = max(three_day_predicted_load) else: three_day_peak = 0 three_day_load_accuracy = {'test': np.nan, 'train': np.nan} three_day_predicted_load = [] else: two_day_peak = 0 two_day_load_accuracy = {'test': np.nan, 'train': np.nan} two_day_predicted_load = [] three_day_peak = 0 three_day_load_accuracy = {'test': np.nan, 'train': np.nan} three_day_predicted_load = [] tomorrow_peak = max(tomorrow_load) m = df[(df['month'] == tomorrow.month) & (df['year'] != tomorrow.year) ] hourly = m m = m.groupby(m.dates.dt.date)['load'].max() o['quantile'] = round(m[m < tomorrow_peak].shape[0]/float(m.shape[0])*100, 2) o['predicted_peak'] = [m.median(), highest_peak_this_month(df, tomorrow), tomorrow_peak, two_day_peak, three_day_peak] o['predicted_peak_limits'] = [ [m.min(), m.max()], [0, 0], [tomorrow_peak*(1 + tomorrow_accuracy['test']*.01), tomorrow_peak*(1 - tomorrow_accuracy['test']*.01)], [two_day_peak*(1 + two_day_load_accuracy['test']*.01), two_day_peak*(1 - two_day_load_accuracy['test']*.01)], [three_day_peak*(1 + three_day_load_accuracy['test']*.01), three_day_peak*(1 - three_day_load_accuracy['test']*.01)] ] m = hourly previous_months = [{ 'year': y, 'load': m[m['year'] == y]['load'].tolist() } for y in m.year.unique()] # ---------------------- FORMAT FOR DISPLAY ------------------------------- # l = [] for d in previous_months: l.append({ 'name': d['year'].item(), 'color': 'lightgrey', 'data': d['load'], 'type': 'line', 'opacity': .05, 'enableMouseTracking': False }) all_load = tomorrow_load + two_day_predicted_load + three_day_predicted_load load_leading_up = df[(df['month'] == tomorrow.month) & (df['year'] == tomorrow.year)]['load'].tolist() l.append({'name': tomorrow.year, 'color': 'black', 'data': load_leading_up[:-72], 'type': 'line'}) l.append({'name':'forecast','color':'blue', 'data': [None]*(len(load_leading_up) - 72) + all_load, 'type': 'line', 'zIndex': 5 }) # add uncertainty uncertainty = [2.02, 2.41, 2.78, 2.91, 3.48, 4.02, 4.2, 3.96, 3.63, 3.68, 4.19, 4.45, 4.77, 4.94, 4.79, 5.22, 5.58, 5.32, 5.44, 4.85, 5.05, 5.51, 5.71, 5.96, 7.84, 8.44, 8.96, 9.06, 8.81, 8.53, 8.4, 8.06, 7.33, 6.5, 6.15, 6.23, 6.43, 6.34, 6.84, 6.76, 7.17, 7.2, 6.93, 6.83, 6.71, 7.39, 8.49, 9.24, 9.36, 10.64, 9.95, 9.4, 9.6, 9.28, 8.52, 8.78, 8.71, 8.59, 8.34, 8.81, 9.12, 9.53, 10.3, 10.67, 10.89, 10.47, 9.67, 8.95, 8.79, 9.18, 9.92, 10.25] print(tomorrow_accuracy['test']) l.append({ 'name': 'uncertainty', 'color': '#b3b3ff', 'data': [None]*(len(load_leading_up) - 72) + [x*u*.01*2 for u, x in zip(uncertainty, all_load)], }) l.append({ 'id': 'transparent', 'color': 'rgba(255,255,255,0)', 'data': [None]*(len(load_leading_up) - 72) + [x*(1-u*.01) for u, x in zip(uncertainty, all_load)] }) o['previous_months'] = l o['load_test_accuracy'] = round(tomorrow_accuracy['test'], 2) o['load_train_accuracy'] = round(tomorrow_accuracy['train'], 2) o['tomorrow_test_accuracy'] = round(tomorrow_accuracy['test'], 2) o['tomorrow_train_accuracy'] = round(tomorrow_accuracy['train'], 2) o['two_day_peak_train_accuracy'] = round(two_day_load_accuracy['train'], 2) o['two_day_peak_test_accuracy'] = round(two_day_load_accuracy['test'], 2) o['three_day_peak_train_accuracy'] = round(three_day_load_accuracy['train'], 2) o['three_day_peak_test_accuracy'] = round(three_day_load_accuracy['test'], 2) o['peak_percent_chance'] = peak_likelihood( hist=highest_peak_this_month(df[:-48], tomorrow), tomorrow=tomorrow_peak, tomorrow_std=tomorrow_peak*tomorrow_accuracy['test']*.01, two_day=two_day_peak, two_day_std=two_day_peak*two_day_load_accuracy['test']*.01, three_day=three_day_peak, three_day_std=three_day_peak*three_day_load_accuracy['test']*.01 ) o['stderr'] = '' with open(pJoin(modelDir,'one_day_model.h5'), 'rb') as f: one_day_model = base64.standard_b64encode(f.read()).decode() with open(pJoin(modelDir,'two_day_model.h5'), 'rb') as f: two_day_model = base64.standard_b64encode(f.read()).decode() with open(pJoin(modelDir,'three_day_model.h5'), 'rb') as f: three_day_model = base64.standard_b64encode(f.read()).decode() # re-input values (i.e. modify the mutable dictionary that is used in heavyprocessing!!!!!!) ind['newModel'] = 'False', ind['one_day_model'] = one_day_model, ind['one_day_model_filename'] = 'one_day_model.h5', ind['two_day_model'] = two_day_model, ind['two_day_model_filename'] = 'two_day_model.h5', ind['three_day_model'] = three_day_model, ind['three_day_model_filename'] = 'three_day_model.h5', return o
def work(modelDir, ind): #print(ind) ''' Run the model in its directory.''' # drop inverter efficiency # drop DoD (cellCapacity, dischargeRate, chargeRate, cellQuantity, cellCost) = \ [float(ind[x]) for x in ('cellCapacity', 'dischargeRate', 'chargeRate', 'cellQuantity', 'cellCost')] battEff = float(ind.get("batteryEfficiency")) / 100.0 dodFactor = float(ind.get('dodFactor')) / 100.0 projYears = int(ind.get('projectionLength')) batteryCycleLife = int(ind.get('batteryCycleLife')) o = {} try: with open(pJoin(modelDir, 'hist.csv'), 'w') as f: f.write(ind['historicalData']) #.replace('\r', '')) df = pd.read_csv(pJoin(modelDir, 'hist.csv'), parse_dates=['dates']) df['month'] = df['dates'].dt.month df['dayOfYear'] = df['dates'].dt.dayofyear assert df.shape[0] >= 26280 # must be longer than 3 years assert df.shape[1] == 5 except ZeroDivisionError: raise Exception("CSV file is incorrect format.") # retrieve goal goal = ind['goal'] threshold = float(ind['transformerThreshold']) * 1000 confidence = float(ind['confidence']) / 100 # train model on previous data all_X = fc.makeUsefulDf(df) all_y = df['load'] X_train, y_train = all_X[:-8760], all_y[:-8760] clf = linear_model.SGDRegressor(max_iter=10000, tol=1e-4) clf.fit(X_train, y_train) # ---------------------- MAKE PREDICTIONS ------------------------------- # X_test, y_test = all_X[-8760:], all_y[-8760:] # Collect data necessary for dispatch calculations predictions = clf.predict(X_test) dailyLoadPredictions = [ predictions[i:i + 24] for i in range(0, len(predictions), 24) ] weather = df['tempc'][-8760:] dailyWeatherPredictions = [ weather[i:i + 24] for i in range(0, len(weather), 24) ] month = df['month'][-8760:] dispatched = [False] * 365 # decide to implement VBAT every day for a year VB_power, VB_energy = [], [] for i, (load24, temp24, m) in enumerate( zip(dailyLoadPredictions, dailyWeatherPredictions, month)): peak = max(load24) if fc.shouldDispatchDeferral(peak, df, confidence, threshold): dispatched[i] = True vbp, vbe = fc.pulp24hrBattery(load24, dischargeRate * cellQuantity, cellCapacity * cellQuantity, battEff) VB_power.extend(vbp) VB_energy.extend(vbe) else: VB_power.extend([0] * 24) VB_energy.extend([0] * 24) # -------------------- MODEL ACCURACY ANALYSIS -------------------------- # o['predictedLoad'] = list(clf.predict(X_test)) o['trainAccuracy'] = round(clf.score(X_train, y_train) * 100, 2) o['testAccuracy'] = round(clf.score(X_test, y_test) * 100, 2) # PRECISION AND RECALL maxDays = [] for month in range(1, 13): test = df[df['month'] == month] maxDays.append(test.loc[test['load'].idxmax()]['dayOfYear']) shouldHaveDispatched = [False] * 365 for day in maxDays: shouldHaveDispatched[day] = True truePositive = len([ b for b in [i and j for (i, j) in zip(dispatched, shouldHaveDispatched)] if b ]) falsePositive = len([ b for b in [i and (not j) for (i, j) in zip(dispatched, shouldHaveDispatched)] if b ]) falseNegative = len([ b for b in [(not i) and j for (i, j) in zip(dispatched, shouldHaveDispatched)] if b ]) o['precision'] = round( truePositive / float(truePositive + falsePositive) * 100, 2) o['recall'] = round( truePositive / float(truePositive + falseNegative) * 100, 2) o['number_of_dispatches'] = len([i for i in dispatched if i]) o['MAE'] = round( sum([abs(l - m) / m * 100 for l, m in zip(predictions, list(y_test))]) / 8760., 2) # ---------------------- FINANCIAL ANALYSIS ----------------------------- # o['VBpower'], o['VBenergy'] = list(VB_power), list(VB_energy) # Calculate monthHours year = df[-8760:].copy() year.reset_index(inplace=True) year['hour'] = list(year.index) start = list(year.groupby('month').first()['hour']) finish = list(year.groupby('month').last()['hour']) monthHours = [(s, f + 1) for (s, f) in zip(start, finish)] demand = list(y_test) peakDemand = [max(demand[s:f]) for s, f in monthHours] energyMonthly = [sum(demand[s:f]) for s, f in monthHours] demandAdj = [d + p for d, p in zip(demand, o['VBpower'])] peakAdjustedDemand = [max(demandAdj[s:f]) for s, f in monthHours] energyAdjustedMonthly = [sum(demandAdj[s:f]) for s, f in monthHours] o['demand'] = demand o['peakDemand'] = peakDemand o['energyMonthly'] = energyMonthly o['demandAdjusted'] = demandAdj o['peakAdjustedDemand'] = peakAdjustedDemand o['energyAdjustedMonthly'] = energyAdjustedMonthly initInvestment = cellCost * cellQuantity eCost = float(ind['electricityCost']) dCharge = float(ind['demandChargeCost']) o['VBdispatch'] = [dal - d for dal, d in zip(demandAdj, demand)] o['energyCost'] = [em * eCost for em in energyMonthly] o['energyCostAdjusted'] = [eam * eCost for eam in energyAdjustedMonthly] o['demandCharge'] = [peak * dCharge for peak in peakDemand] o['demandChargeAdjusted'] = [ pad * dCharge for pad in o['peakAdjustedDemand'] ] o['totalCost'] = [ ec + dcm for ec, dcm in zip(o['energyCost'], o['demandCharge']) ] o['totalCostAdjusted'] = [ eca + dca for eca, dca in zip(o['energyCostAdjusted'], o['demandChargeAdjusted']) ] o['savings'] = [ tot - tota for tot, tota in zip(o['totalCost'], o['totalCostAdjusted']) ] annualEarnings = sum(o['savings']) # - something! cashFlowList = [annualEarnings] * int(ind['projectionLength']) cashFlowList.insert(0, -1 * initInvestment) o['NPV'] = np.npv(float(ind['discountRate']) / 100, cashFlowList) o['SPP'] = initInvestment / annualEarnings o['netCashflow'] = cashFlowList o['cumulativeCashflow'] = [ sum(cashFlowList[:i + 1]) for i, d in enumerate(cashFlowList) ] o['dataCheck'] = 'Threshold exceeded' if any( [threshold > i for i in demandAdj]) and goal == 'deferral' else '' o['transformerThreshold'] = threshold if goal == 'deferral' else None o['stdout'] = 'Success' return o
def work(modelDir, ind): ''' Model processing done here. ''' epochs = int(ind['epochs']) o = {} # See bottom of file for out's structure try: with open(pJoin(modelDir, 'hist.csv'), 'w') as f: f.write(ind['histCurve'].replace('\r', '')) df = pd.read_csv(pJoin(modelDir, 'hist.csv')) assert df.shape[0] >= 26280 # must be longer than 3 years if 'dates' not in df.columns: df['dates'] = df.apply( lambda x: dt( int(x['year']), int(x['month']), int(x['day']), int(x['hour'])), axis=1 ) except: raise Exception("Load CSV file is incorrect format.") try: weather = [float(i) for i in ind['tempCurve'].split('\n')] assert len(weather) == 72, "weather csv in wrong format" except: raise Exception(ind['tempCurve']) # ---------------------- MAKE PREDICTIONS ------------------------------- # df, tomorrow = lf.add_day(df, weather[:24]) all_X = lf.makeUsefulDf(df) all_y = df['load'] #load prediction tomorrow_load, model, tomorrow_accuracy = lf.neural_net_next_day(all_X, all_y, epochs=epochs, save_file=pJoin(modelDir, 'neural_net_1day.h5')) # tomorrow_load = [13044.3369140625, 12692.4453125, 11894.0712890625, 13391.0185546875, 13378.373046875, 14098.5048828125, 14984.5, 15746.6845703125, 14677.6064453125, 14869.6953125, 14324.302734375, 13727.908203125, 13537.51171875, 12671.90234375, 13390.9970703125, 12111.166015625, 13539.05078125, 15298.7939453125, 14620.8369140625, 15381.9404296875, 15116.42578125, 13652.3974609375, 13599.5986328125, 12882.5185546875] # tomorrow_accuracy = {'test': 4, 'train': 3} o['tomorrow_load'] = tomorrow_load o['month_start'] = dt(tomorrow.year, tomorrow.month, 1).strftime("%A, %B %-d, %Y") o['forecast_start'] = tomorrow.strftime("%A, %B %-d, %Y") # second day df, second_day = lf.add_day(df, weather[24:48]) if second_day.month == tomorrow.month: all_X = lf.makeUsefulDf(df, hours_prior=48, noise=5) all_y = df['load'] two_day_predicted_load, two_day_model, two_day_load_accuracy = lf.neural_net_next_day(all_X, all_y, epochs=epochs, hours_prior=48, save_file=pJoin(modelDir, 'neural_net_2day.h5')) two_day_peak = max(two_day_predicted_load) # third day df, third_day = lf.add_day(df, weather[48:72]) if third_day.month == tomorrow.month: all_X = lf.makeUsefulDf(df, hours_prior=72, noise=15) all_y = df['load'] three_day_predicted_load, three_day_model, three_day_load_accuracy = lf.neural_net_next_day(all_X, all_y, epochs=epochs, hours_prior=72, save_file=pJoin(modelDir, 'neural_net_3day.h5')) three_day_peak = max(three_day_predicted_load) else: three_day_peak = 0 three_day_load_accuracy = {'test': np.nan, 'train': np.nan} else: two_day_peak = 0 two_day_load_accuracy = {'test': np.nan, 'train': np.nan} three_day_peak = 0 three_day_load_accuracy = {'test': np.nan, 'train': np.nan} tomorrow_peak = max(tomorrow_load) m = df[(df['month'] == tomorrow.month) & (df['year'] != tomorrow.year) ] o['quantile'] = round(m[m['load'] < tomorrow_peak].shape[0]/float(m.shape[0])*100, 2) o['predicted_peak'] = [m['load'].median(), highest_peak_this_month(df, tomorrow), tomorrow_peak, two_day_peak, three_day_peak] o['predicted_peak_limits'] = [ [m['load'].min(), m['load'].max()], [0, 0], [tomorrow_peak*(1 + tomorrow_accuracy['test']*.01), tomorrow_peak*(1 - tomorrow_accuracy['test']*.01)], [two_day_peak*(1 + two_day_load_accuracy['test']*.01), two_day_peak*(1 - two_day_load_accuracy['test']*.01)], [three_day_peak*(1 + three_day_load_accuracy['test']*.01), three_day_peak*(1 - three_day_load_accuracy['test']*.01)] ] previous_months = [{ 'year': y, 'load': m[m['year'] == y]['load'].tolist() } for y in m.year.unique()] # hard-code the input for highcharts o['cats_pred'] = list(range(744)) ### FIX THIS l = [] for d in previous_months: l.append({ 'name': d['year'], 'color': 'lightgrey', 'data': d['load'], 'type': 'line', 'opacity': .05, 'enableMouseTracking': False }) load_leading_up = df[(df['month'] == tomorrow.month) & (df['year'] == tomorrow.year)]['load'].tolist() l.append({'name': tomorrow.year, 'color': 'black', 'data': load_leading_up[:-72], 'type': 'line'}) l.append({'name':'forecast','color':'blue','data': [None]*(len(load_leading_up) - 72) + o['tomorrow_load'],'type': 'line'}) o['previous_months'] = l o['load_test_accuracy'] = round(tomorrow_accuracy['test'], 2) o['load_train_accuracy'] = round(tomorrow_accuracy['train'], 2) o['tomorrow_test_accuracy'] = round(tomorrow_accuracy['test'], 2) o['tomorrow_train_accuracy'] = round(tomorrow_accuracy['train'], 2) o['two_day_peak_train_accuracy'] = round(two_day_load_accuracy['train'], 2) o['two_day_peak_test_accuracy'] = round(two_day_load_accuracy['test'], 2) o['three_day_peak_train_accuracy'] = round(three_day_load_accuracy['train'], 2) o['three_day_peak_test_accuracy'] = round(three_day_load_accuracy['test'], 2) o['peak_percent_chance'] = peak_likelihood( hist=highest_peak_this_month(df[:-48], tomorrow), tomorrow=tomorrow_peak, tomorrow_std=tomorrow_peak*tomorrow_accuracy['test']*.01, two_day=two_day_peak, two_day_std=two_day_peak*two_day_load_accuracy['test']*.01, three_day=three_day_peak, three_day_std=three_day_peak*three_day_load_accuracy['test']*.01 ) o['stderr'] = '' return o