Exemplos de makeUsefulDf em Python, exemplos de omf.forecast.makeUsefulDf em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: nn_storagePeakShave.py Projeto: sammatuba/omf

def forecastWork(modelDir, ind):
    ''' Run the model in its directory.'''

    (cellCapacity, dischargeRate, chargeRate, cellQuantity, cellCost) = \
     [float(ind[x]) for x in ('cellCapacity', 'dischargeRate', 'chargeRate', 'cellQuantity', 'cellCost')]
    demandCharge = float(ind['demandCharge'])
    retailCost = float(ind['retailCost'])

    battEff = float(ind.get("batteryEfficiency")) / 100.0
    dodFactor = float(ind.get('dodFactor')) / 100.0
    projYears = int(ind.get('projYears'))
    batteryCycleLife = int(ind.get('batteryCycleLife'))
    battCapacity = cellQuantity * float(ind['cellCapacity']) * dodFactor

    o = {}

    try:
        with open(pJoin(modelDir, 'hist.csv'), 'w') as f:
            f.write(ind['histCurve'].replace('\r', ''))
        df = pd.read_csv(pJoin(modelDir, 'hist.csv'), parse_dates=['dates'])
        df['month'] = df['dates'].dt.month
        df['dayOfYear'] = df['dates'].dt.dayofyear
        assert df.shape[0] >= 26280  # must be longer than 3 years
        assert df.shape[1] == 5
    except:
        raise Exception("CSV file is incorrect format.")

    confidence = float(ind['confidence']) / 100

    # ---------------------- MAKE PREDICTIONS ------------------------------- #
    # train model on previous data
    all_X = fc.makeUsefulDf(df)
    all_y = df['load']
    predictions = fc.neural_net_predictions(all_X, all_y)

    dailyLoadPredictions = [
        predictions[i:i + 24] for i in range(0, len(predictions), 24)
    ]
    weather = df['tempc'][-8760:]
    dailyWeatherPredictions = [
        weather[i:i + 24] for i in range(0, len(weather), 24)
    ]
    month = df['month'][-8760:]

    dispatched = [False] * 365
    # decide to implement VBAT every day for a year
    VB_power, VB_energy = [], []
    for i, (load24, temp24, m) in enumerate(
            zip(dailyLoadPredictions, dailyWeatherPredictions, month)):
        peak = max(load24)
        if fc.shouldDispatchPS(peak, m, df, confidence):
            dispatched[i] = True
            vbp, vbe = fc.pulp24hrBattery(load24, dischargeRate * cellQuantity,
                                          cellCapacity * cellQuantity, battEff)
            VB_power.extend(vbp)
            VB_energy.extend(vbe)
        else:
            VB_power.extend([0] * 24)
            VB_energy.extend([0] * 24)

    # -------------------- MODEL ACCURACY ANALYSIS -------------------------- #
    o['predictedLoad'] = predictions
    o['trainAccuracy'] = 0  #round(model.score(X_train, y_train) * 100, 2)
    o['testAccuracy'] = 0  #round(model.score(X_test, y_test) * 100, 2)

    # PRECISION AND RECALL
    maxDays = []
    for month in range(1, 13):
        test = df[df['month'] == month]
        maxDays.append(test.loc[test['load'].idxmax()]['dayOfYear'])

    shouldHaveDispatched = [False] * 365
    for day in maxDays:
        shouldHaveDispatched[day] = True

    truePositive = len([
        b
        for b in [i and j for (i, j) in zip(dispatched, shouldHaveDispatched)]
        if b
    ])
    falsePositive = len([
        b for b in
        [i and (not j) for (i, j) in zip(dispatched, shouldHaveDispatched)]
        if b
    ])
    falseNegative = len([
        b for b in [(not i) and j
                    for (i, j) in zip(dispatched, shouldHaveDispatched)] if b
    ])
    o['precision'] = round(
        truePositive / float(truePositive + falsePositive) * 100, 2)
    o['recall'] = round(
        truePositive / float(truePositive + falseNegative) * 100, 2)
    o['number_of_dispatches'] = len([i for i in dispatched if i])
    o['MAE'] = round(
        sum([
            abs(l - m) / m * 100
            for l, m in zip(predictions, list(all_y[-8760:]))
        ]) / 8760., 2)

    # ---------------------- FINANCIAL ANALYSIS ----------------------------- #

    # Calculate monthHours
    year = df[-8760:].copy()
    year.reset_index(inplace=True)
    year['hour'] = list(year.index)
    start = list(year.groupby('month').first()['hour'])
    finish = list(year.groupby('month').last()['hour'])
    monthHours = [(s, f + 1) for (s, f) in zip(start, finish)]

    demand = list(all_y[-8760:])
    peakDemand = [max(demand[s:f]) for s, f in monthHours]
    demandAdj = [d + p for d, p in zip(demand, VB_power)]
    peakDemandAdj = [max(demandAdj[s:f]) for s, f in monthHours]
    discharges = [f if f < 0 else 0 for f in VB_power]

    # Monthly Cost Comparison Table
    o['monthlyDemand'] = peakDemand
    o['monthlyDemandRed'] = peakDemandAdj
    o['ps'] = [p - s for p, s in zip(peakDemand, peakDemandAdj)]
    o['benefitMonthly'] = [x * demandCharge for x in o['ps']]

    # Demand Before and After Storage Graph
    o['demand'] = demand
    o['demandAfterBattery'] = demandAdj
    o['batteryDischargekW'] = VB_power
    o['batteryDischargekWMax'] = max(VB_power)

    batteryCycleLife = float(ind['batteryCycleLife'])
    # Battery State of Charge Graph
    # Turn dc's SoC into a percentage, with dodFactor considered.

    o['batterySoc'] = SoC = [100 - (e / battCapacity * 100) for e in VB_energy]

    # Estimate number of cyles the battery went through. Sums the percent of SoC.
    cycleEquivalents = sum([
        SoC[i] - SoC[i + 1]
        for i, x in enumerate(SoC[:-1]) if SoC[i + 1] < SoC[i]
    ]) / 100.0
    o['cycleEquivalents'] = cycleEquivalents
    o['batteryLife'] = batteryCycleLife / cycleEquivalents

    # Cash Flow Graph
    # inserting battery efficiency only into the cashflow calculation
    # cashFlowCurve is $ in from peak shaving minus the cost to recharge the battery every day of the year
    cashFlowCurve = [sum(o['ps']) * demandCharge for year in range(projYears)]
    cashFlowCurve.insert(0, -1 * cellCost *
                         cellQuantity)  # insert initial investment
    # simplePayback is also affected by the cost to recharge the battery every day of the year
    o['SPP'] = (cellCost * cellQuantity) / (sum(o['ps']) * demandCharge)
    o['netCashflow'] = cashFlowCurve
    o['cumulativeCashflow'] = [
        sum(cashFlowCurve[:i + 1]) for i, d in enumerate(cashFlowCurve)
    ]
    o['NPV'] = npv(float(ind['discountRate']), cashFlowCurve)

    battCostPerCycle = cellQuantity * cellCost / batteryCycleLife
    lcoeTotCost = cycleEquivalents * retailCost + battCostPerCycle * cycleEquivalents
    o['LCOE'] = lcoeTotCost / (cycleEquivalents * battCapacity)

    # Other
    o['startDate'] = '2011-01-01'  # dc[0]['datetime'].isoformat()
    o['stderr'] = ''
    # Seemingly unimportant. Ask permission to delete.
    o['stdout'] = 'Success'
    o['months'] = [
        "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct",
        "Nov", "Dec"
    ]

    return o

Exemplo n.º 2

0

Exibir arquivo

Arquivo: f_vbatDispatch.py Projeto: sammatuba/omf

def workForecast(modelDir, ind):
    ''' Run the model in its directory.'''
    o = {}

    # Grab data from CSV,
    try:
        with open(pJoin(modelDir, 'hist.csv'), 'w') as f:
            f.write(ind['histCurve'].replace('\r', ''))
        df = pd.read_csv(pJoin(modelDir, 'hist.csv'), parse_dates=['dates'])
        df['month'] = df['dates'].dt.month
        df['dayOfYear'] = df['dates'].dt.dayofyear
        assert df.shape[0] >= 26280  # must be longer than 3 years
        assert df.shape[1] == 5
    except:
        raise Exception("CSV file is incorrect format.")

    # train model on previous data
    all_X = fc.makeUsefulDf(df)
    all_y = df['load']
    X_train, y_train = all_X[:-8760], all_y[:-8760]
    clf = linear_model.SGDRegressor(max_iter=10000, tol=1e-4)
    clf.fit(X_train, y_train)

    # ---------------------- MAKE PREDICTIONS ------------------------------- #
    X_test, y_test = all_X[-8760:], all_y[-8760:]
    predictions = clf.predict(X_test)
    dailyLoadPredictions = [
        predictions[i:i + 24] for i in range(0, len(predictions), 24)
    ]

    P_lower, P_upper, E_UL = vbat24hr(ind, df['tempc'][-8760:])
    dailyPl = [P_lower[i:i + 24] for i in range(0, len(P_lower), 24)]
    dailyPu = [P_upper[i:i + 24] for i in range(0, len(P_upper), 24)]
    dailyEu = [E_UL[i:i + 24] for i in range(0, len(E_UL), 24)]

    vbp, vbe = [], []
    dispatched_d = [False] * 365
    # Decide what days to dispatch
    zipped = zip(dailyLoadPredictions, df['month'][-8760:], dailyPl, dailyPu,
                 dailyEu)
    for i, (load, m, pl, pu, eu) in enumerate(zipped):
        peak = max(load)
        if fc.shouldDispatchPS(peak, m, df, float(ind['confidence']) / 100):
            dispatched_d[i] = True
            p, e = fc.pulp24hrVbat(ind, load, pl, pu, eu)
            vbp.extend(p)
            vbe.extend(e)
        else:
            vbp.extend([0] * 24)
            vbe.extend([0] * 24)

    ### TESTING FOR ACCURACY ###
    assert len(dailyPl) == 365
    assert all([len(i) == 24 for i in dailyPl])

    VB_power, VB_energy = vbp, vbe

    # -------------------- MODEL ACCURACY ANALYSIS -------------------------- #

    o['predictedLoad'] = list(clf.predict(X_test))
    o['trainAccuracy'] = round(clf.score(X_train, y_train) * 100, 2)
    o['testAccuracy'] = round(clf.score(X_test, y_test) * 100, 2)

    # PRECISION AND RECALL
    maxDays = []
    for month in range(1, 13):
        test = df[df['month'] == month]
        maxDays.append(test.loc[test['load'].idxmax()]['dayOfYear'])

    shouldHaveDispatched = [False] * 365
    for day in maxDays:
        shouldHaveDispatched[day] = True

    truePositive = len([
        b for b in
        [i and j for (i, j) in zip(dispatched_d, shouldHaveDispatched)] if b
    ])
    falsePositive = len([
        b for b in
        [i and (not j) for (i, j) in zip(dispatched_d, shouldHaveDispatched)]
        if b
    ])
    falseNegative = len([
        b for b in [(not i) and j
                    for (i, j) in zip(dispatched_d, shouldHaveDispatched)] if b
    ])
    o['confidence'] = ind['confidence']
    o['precision'] = round(
        truePositive / float(truePositive + falsePositive) * 100, 2)
    o['recall'] = round(
        truePositive / float(truePositive + falseNegative) * 100, 2)
    o['number_of_dispatches'] = len([i for i in dispatched_d if i])
    o['MAE'] = round(
        sum([abs(l - m) / m * 100
             for l, m in zip(predictions, list(y_test))]) / 8760., 2)

    # ---------------------- FINANCIAL ANALYSIS ----------------------------- #

    o['VBpower'], o['VBenergy'] = list(VB_power), list(VB_energy)

    # Calculate monthHours
    year = df[-8760:].copy()
    year.reset_index(inplace=True)
    year['hour'] = list(year.index)
    start = list(year.groupby('month').first()['hour'])
    finish = list(year.groupby('month').last()['hour'])
    monthHours = [(s, f + 1) for (s, f) in zip(start, finish)]

    demand = list(y_test)
    peakDemand = [max(demand[s:f]) for s, f in monthHours]
    energyMonthly = [sum(demand[s:f]) for s, f in monthHours]
    demandAdj = [d + p for d, p in zip(demand, o['VBpower'])]
    peakAdjustedDemand = [max(demandAdj[s:f]) for s, f in monthHours]
    energyAdjustedMonthly = [sum(demandAdj[s:f]) for s, f in monthHours]

    o['demand'] = demand
    o['peakDemand'] = peakDemand
    o['energyMonthly'] = energyMonthly
    o['demandAdjusted'] = demandAdj
    o['peakAdjustedDemand'] = peakAdjustedDemand
    o['energyAdjustedMonthly'] = energyAdjustedMonthly

    cellCost = float(ind['unitDeviceCost']) * float(ind['number_devices'])
    eCost = float(ind['electricityCost'])
    dCharge = float(ind['demandChargeCost'])

    o['VBdispatch'] = [dal - d for dal, d in zip(demandAdj, demand)]
    o['energyCost'] = [em * eCost for em in energyMonthly]
    o['energyCostAdjusted'] = [eam * eCost for eam in energyAdjustedMonthly]
    o['demandCharge'] = [peak * dCharge for peak in peakDemand]
    o['demandChargeAdjusted'] = [
        pad * dCharge for pad in o['peakAdjustedDemand']
    ]
    o['totalCost'] = [
        ec + dcm for ec, dcm in zip(o['energyCost'], o['demandCharge'])
    ]
    o['totalCostAdjusted'] = [
        eca + dca
        for eca, dca in zip(o['energyCostAdjusted'], o['demandChargeAdjusted'])
    ]
    o['savings'] = [
        tot - tota for tot, tota in zip(o['totalCost'], o['totalCostAdjusted'])
    ]

    annualEarnings = sum(o['savings']) - float(ind['unitUpkeepCost']) * float(
        ind['number_devices'])
    cashFlowList = [annualEarnings] * int(ind['projectionLength'])
    cashFlowList.insert(0, -1 * cellCost)

    o['NPV'] = np.npv(float(ind['discountRate']) / 100, cashFlowList)
    o['SPP'] = cellCost / annualEarnings
    o['netCashflow'] = cashFlowList
    o['cumulativeCashflow'] = [
        sum(cashFlowList[:i + 1]) for i, d in enumerate(cashFlowList)
    ]

    o['stdout'] = 'Success'
    return o

Exemplo n.º 3

0

Exibir arquivo

def work(modelDir, inputDict):
    """ Run the model in its directory."""
    outData = {}
    rawData = []
    actual = []

    # write input file to modelDir sans carriage returns
    with open(pJoin(modelDir, "demandTemp.csv"), "w") as demandTempFile:
        demandTempFile.write(inputDict["demandTemp"].replace("\r", ""))

    try:
        with open(pJoin(modelDir, 'hist.csv'), 'w') as f:
            f.write(inputDict['nn'].replace('\r', ''))
        df = pd.read_csv(pJoin(modelDir, 'hist.csv'))
        assert df.shape[0] >= 26280  # must be longer than 3 years
        if 'dates' not in df.columns:
            df['dates'] = df.apply(lambda x: dt(int(x['year']), int(x[
                'month']), int(x['day']), int(x['hour'])),
                                   axis=1)
    except:
        raise Exception("Neural Net CSV file is incorrect format.")

    # neural net time
    all_X = loadForecast.makeUsefulDf(df)
    all_y = df["load"]
    nn_pred, nn_accuracy = loadForecast.neural_net_predictions(all_X, all_y)
    outData["actual_nn"] = df['load'][-8760:].tolist()

    # read it in as a list of lists
    try:
        with open(pJoin(modelDir, "demandTemp.csv")) as inFile:
            df = pd.read_csv(inFile, header=None)
            df.columns = ["load", "tempc"]
            df["dates"] = pd.date_range(start=inputDict["simStartDate"],
                                        freq="H",
                                        periods=df.shape[0])
            print df.shape[0]
    except ZeroDivisionError:
        errorMessage = "CSV file is incorrect format. Please see valid format definition at <a target='_blank' href = 'https://github.com/dpinney/omf/wiki/Models-~-storagePeakShave#demand-file-csv-format'>\nOMF Wiki storagePeakShave - Demand File CSV Format</a>"
        raise Exception(errorMessage)

    rawData = df[["load", "tempc"]].fillna(0).values.tolist()
    del df
    """
	# None -> 0, float-> string
	for i in range(len(rawData)):
		rawData[i] = [a if a else 0 for a in rawData[i]]
	rawData = list(np.float_(rawData))
	"""

    # populate actual list
    for x in range(len(rawData)):
        actual.append(float(rawData[x][0]))

    (forecasted,
     MAPE) = loadForecast.rollingDylanForecast(rawData,
                                               float(inputDict["upBound"]),
                                               float(inputDict["lowBound"]))

    (exp, exp_MAPE) = loadForecast.exponentiallySmoothedForecast(
        rawData, float(inputDict["alpha"]), float(inputDict["beta"]))

    # parse json params for nextDayPeakKatrina
    try:
        params = json.loads(inputDict.get("katSpec", "{}"))
    except ValueError:
        params = {}

    pred_demand = loadForecast.nextDayPeakKatrinaForecast(
        rawData, inputDict["simStartDate"], modelDir, params)
    pred_demand = np.transpose(np.array(pred_demand)).tolist()

    # zucc it up
    prophet_partitions = int(inputDict.get("prophet", 0))
    if prophet_partitions > 1:
        prophet, prophet_low, prophet_high = loadForecast.prophetForecast(
            rawData, inputDict["simStartDate"], modelDir, inputDict["prophet"])

        # write our outData
    outData["startDate"] = inputDict["simStartDate"]
    outData["actual"] = actual
    outData["forecasted"] = forecasted
    outData["doubleExp"] = exp
    outData["neuralNet"] = nn_pred
    outData["MAPE"] = "%0.2f%%" % (MAPE * 100)
    outData["MAPE_exp"] = "%0.2f%%" % (exp_MAPE * 100)
    outData["MAPE_nn"] = "%0.2f%%" % nn_accuracy["test"]
    outData["peakDemand"] = pred_demand
    if prophet_partitions > 1:
        outData["prophet"] = prophet
        outData["prophetLow"] = prophet_low
        outData["prophetHigh"] = prophet_high
    return outData

Exemplo n.º 4

0

Exibir arquivo

Arquivo: storagePeakShave.py Projeto: akbarnes/omf

def forecastWork(modelDir, ind):
    import tensorflow as tf
    ''' Run the model in its directory.'''
    (cellCapacity, dischargeRate, chargeRate, cellQuantity, cellCost) = \
     [float(ind[x]) for x in ('cellCapacity', 'dischargeRate', 'chargeRate', 'cellQuantity', 'cellCost')]
    demandCharge = float(ind['demandCharge'])
    retailCost = float(ind['retailCost'])

    battEff = float(ind.get("batteryEfficiency")) / 100.0
    dodFactor = float(ind.get('dodFactor')) / 100.0
    projYears = int(ind.get('projYears'))
    batteryCycleLife = int(ind.get('batteryCycleLife'))
    battCapacity = cellQuantity * float(ind['cellCapacity']) * dodFactor

    o = {}

    try:
        with open(pJoin(modelDir, 'hist.csv'), 'w') as f:
            f.write(ind['histCurve'].replace('\r', ''))
        df = pd.read_csv(pJoin(modelDir, 'hist.csv'))
        assert df.shape[0] >= 26280  # must be longer than 3 years
        if df.shape[1] == 6:
            df['dates'] = df.apply(lambda x: dt(int(x['year']), int(x[
                'month']), int(x['day']), int(x['hour'])),
                                   axis=1)
        else:
            df = pd.read_csv(pJoin(modelDir, 'hist.csv'),
                             parse_dates=['dates'])
            df['month'] = df.dates.dt.month
        df['dayOfYear'] = df['dates'].dt.dayofyear
    except:
        raise Exception("CSV file is incorrect format.")

    # ---------------------- MAKE PREDICTIONS ------------------------------- #
    # train model on previous data
    all_X = fc.makeUsefulDf(df)
    all_y = df['load']
    if ind['newModel'] == 'True':
        model = None
    else:
        with open(pJoin(modelDir, 'neural_net.h5'), 'wb') as f:
            f.write(ind['model'].decode('base64'))
        model = tf.keras.models.load_model(pJoin(modelDir, 'neural_net.h5'))
        # model = tf.keras.models.load_model(ind['model'])
    predictions, accuracy = fc.neural_net_predictions(
        all_X,
        all_y,
        epochs=int(ind['epochs']),
        model=model,
        save_file=pJoin(modelDir, 'neural_net_model.h5'))

    dailyLoadPredictions = [
        predictions[i:i + 24] for i in range(0, len(predictions), 24)
    ]
    weather = df['tempc'][-8760:]
    dailyWeatherPredictions = [
        weather[i:i + 24] for i in range(0, len(weather), 24)
    ]

    # decide to implement VBAT every day for a year
    VB_power, VB_energy = [], []
    for i, (load24, temp24) in enumerate(
            zip(dailyLoadPredictions, dailyWeatherPredictions)):
        vbp, vbe = pulp24hrBattery(load24, dischargeRate * cellQuantity,
                                   cellCapacity * cellQuantity, battEff)
        VB_power.extend(vbp)
        VB_energy.extend(vbe)

    # -------------------- MODEL ACCURACY ANALYSIS -------------------------- #
    o['predictedLoad'] = predictions
    o['trainAccuracy'] = 100 - round(accuracy['train'], 1)
    o['testAccuracy'] = 100 - round(accuracy['test'], 1)
    # ---------------------- FINANCIAL ANALYSIS ----------------------------- #

    # Calculate monthHours
    year = df[-8760:].copy()
    year.reset_index(inplace=True)
    year['hour'] = list(year.index)
    start = list(year.groupby('month').first()['hour'])
    finish = list(year.groupby('month').last()['hour'])
    monthHours = [(s, f + 1) for (s, f) in zip(start, finish)]

    demand = list(df['load'][-8760:])
    peakDemand = [max(demand[s:f]) for s, f in monthHours]
    demandAdj = [d + p for d, p in zip(demand, VB_power)]
    peakDemandAdj = [max(demandAdj[s:f]) for s, f in monthHours]

    # Monthly Cost Comparison Table
    o['monthlyDemand'] = peakDemand
    o['monthlyDemandRed'] = peakDemandAdj
    o['ps'] = [p - s for p, s in zip(peakDemand, peakDemandAdj)]
    o['benefitMonthly'] = [x * demandCharge for x in o['ps']]

    # Demand Before and After Storage Graph
    o['demand'] = demand
    o['demandAfterBattery'] = demandAdj
    o['batteryDischargekW'] = VB_power
    o['batteryDischargekWMax'] = max(VB_power)

    batteryCycleLife = float(ind['batteryCycleLife'])
    o['batterySoc'] = SoC = [100 - (e / battCapacity * 100) for e in VB_energy]
    cycleEquivalents = sum([
        SoC[i] - SoC[i + 1]
        for i, x in enumerate(SoC[:-1]) if SoC[i + 1] < SoC[i]
    ]) / 100.0
    o['cycleEquivalents'] = cycleEquivalents
    o['batteryLife'] = batteryCycleLife / (cycleEquivalents + 10)

    # Cash Flow Graph
    cashFlowCurve = [sum(o['ps']) * demandCharge for year in range(projYears)]
    cashFlowCurve.insert(0, -1 * cellCost *
                         cellQuantity)  # insert initial investment
    o['SPP'] = (cellCost * cellQuantity) / (sum(o['ps']) * demandCharge)
    o['netCashflow'] = cashFlowCurve
    o['cumulativeCashflow'] = [
        sum(cashFlowCurve[:i + 1]) for i, d in enumerate(cashFlowCurve)
    ]
    o['NPV'] = npv(float(ind['discountRate']), cashFlowCurve)

    battCostPerCycle = cellQuantity * cellCost / batteryCycleLife
    lcoeTotCost = cycleEquivalents * retailCost + battCostPerCycle * cycleEquivalents
    o['LCOE'] = lcoeTotCost / (cycleEquivalents * battCapacity + 10)

    model

    # Other
    o['startDate'] = '2011-01-01'
    o['stderr'] = ''
    o['stdout'] = 'Success'

    return o

Exemplo n.º 5

0

Exibir arquivo

Arquivo: forecastTool.py Projeto: sammatuba/omf

def work(modelDir, ind):
	''' Model processing done here. '''
	epochs = int(ind['epochs'])
	o = {}  # See bottom of file for out's structure

	try:
		with open(pJoin(modelDir, 'hist.csv'), 'w') as f:
			f.write(ind['histCurve'].replace('\r', ''))
		df = pd.read_csv(pJoin(modelDir, 'hist.csv'))
		assert df.shape[0] >= 26280, 'At least 3 years of data is required'

		if 'dates' not in df.columns:
			df['dates'] = df.apply(
				lambda x: dt(
					int(x['year']), 
					int(x['month']), 
					int(x['day']), 
					int(x['hour'])), 
				axis=1
			)
	except:
		raise Exception("Load CSV file is incorrect format.")

	try:
		weather = [float(i) for i in ind['tempCurve'].split('\n') if i != '']
		assert len(weather) == 72, "weather csv in wrong format"
	except:
		raise Exception(ind['tempCurve'])

	# ---------------------- MAKE PREDICTIONS ------------------------------- #

	df = df.sort_values('dates')
	# df = autofill(df)
	d = dict(df.groupby(df.dates.dt.date)['dates'].count())
	df = df[df['dates'].dt.date.apply(lambda x: d[x] == 24)] # find all non-24

	df, tomorrow = lf.add_day(df, weather[:24])
	all_X, all_y = lf.makeUsefulDf(df, structure="3D")

	if ind['newModel'] == 'False':
		for day in ['one_day_model', 'two_day_model', 'three_day_model']:
			with open(pJoin(modelDir, ind[day+'_filename']), 'wb') as f:
					f.write(base64.standard_b64decode(ind[day]))
	
	tomorrow_load, model, tomorrow_accuracy = lf.neural_net_next_day(
		all_X, all_y, 
		epochs=epochs, save_file=pJoin(modelDir, 'one_day_model.h5'),
		model=(None if ind['newModel'] == 'True' else tf.keras.models.load_model(pJoin(modelDir, ind['one_day_model_filename']))),
		structure="3D"
	)

	o['tomorrow_load'] = tomorrow_load
	o['month_start'] = dt(tomorrow.year, tomorrow.month, 1).strftime("%A, %B %-d, %Y")
	o['forecast_start'] = tomorrow.strftime("%A, %B %-d, %Y")
	
	# second day
	df, second_day = lf.add_day(df, weather[24:48])
	if second_day.month == tomorrow.month:
		all_X, all_y = lf.makeUsefulDf(df, hours_prior=48, noise=5, structure="3D")
		two_day_predicted_load, two_day_model, two_day_load_accuracy = lf.neural_net_next_day(
			all_X, all_y, 
			epochs=epochs, hours_prior=48, 
			save_file=pJoin(modelDir, 'two_day_model.h5'),
			model=(None if ind['newModel'] == 'True' else tf.keras.models.load_model(pJoin(modelDir, ind['two_day_model_filename']))),
			structure="3D"
		)
		two_day_peak = max(two_day_predicted_load)

		# third day
		df, third_day = lf.add_day(df, weather[48:72])
		if third_day.month == tomorrow.month:
			all_X, all_y = lf.makeUsefulDf(df, hours_prior=72, noise=15, structure="3D")
			three_day_predicted_load, three_day_model, three_day_load_accuracy = lf.neural_net_next_day(
				all_X, all_y, 
				epochs=epochs, hours_prior=72, 
				save_file=pJoin(modelDir, 'three_day_model.h5'),
				model=(None if ind['newModel'] == 'True' else tf.keras.models.load_model(pJoin(modelDir, ind['three_day_model_filename']))),
				structure="3D"
			)
			three_day_peak = max(three_day_predicted_load)
		else:
			three_day_peak = 0
			three_day_load_accuracy = {'test': np.nan, 'train': np.nan}
			three_day_predicted_load = []
			
	else:
		two_day_peak = 0
		two_day_load_accuracy = {'test': np.nan, 'train': np.nan}
		two_day_predicted_load = []
		three_day_peak = 0
		three_day_load_accuracy = {'test': np.nan, 'train': np.nan}
		three_day_predicted_load = []

	tomorrow_peak = max(tomorrow_load)
	m = df[(df['month'] == tomorrow.month) & (df['year'] != tomorrow.year) ]
	hourly = m
	m = m.groupby(m.dates.dt.date)['load'].max()
	o['quantile'] = round(m[m < tomorrow_peak].shape[0]/float(m.shape[0])*100, 2)
	o['predicted_peak'] = [m.median(), highest_peak_this_month(df, tomorrow), tomorrow_peak, two_day_peak, three_day_peak]
	o['predicted_peak_limits'] = [
		[m.min(), m.max()],
		[0, 0],
		[tomorrow_peak*(1 + tomorrow_accuracy['test']*.01), tomorrow_peak*(1 - tomorrow_accuracy['test']*.01)],
		[two_day_peak*(1 + two_day_load_accuracy['test']*.01), two_day_peak*(1 - two_day_load_accuracy['test']*.01)],
		[three_day_peak*(1 + three_day_load_accuracy['test']*.01), three_day_peak*(1 - three_day_load_accuracy['test']*.01)]
	]
	m = hourly
	previous_months = [{
		'year': y,
		'load': m[m['year'] == y]['load'].tolist()
	} for y in m.year.unique()]

	# ---------------------- FORMAT FOR DISPLAY ------------------------------- #
	l = []
	for d in previous_months:
		l.append({
			'name': d['year'].item(),
			'color': 'lightgrey',
			'data': d['load'],
			'type': 'line',
			'opacity': .05,
			'enableMouseTracking': False
		})

	all_load = tomorrow_load + two_day_predicted_load + three_day_predicted_load
	load_leading_up = df[(df['month'] == tomorrow.month) & (df['year'] == tomorrow.year)]['load'].tolist()
	l.append({'name': tomorrow.year, 'color': 'black', 'data': load_leading_up[:-72], 'type': 'line'})
	l.append({'name':'forecast','color':'blue', 'data': [None]*(len(load_leading_up) - 72) + all_load, 'type': 'line', 'zIndex': 5 })

	# add uncertainty
	uncertainty = [2.02, 2.41, 2.78, 2.91, 3.48, 4.02, 4.2, 3.96, 3.63, 3.68, 4.19, 4.45, 4.77, 4.94, 4.79, 5.22, 5.58, 5.32, 5.44, 4.85, 5.05, 5.51, 5.71, 5.96, 7.84, 8.44, 8.96, 9.06, 8.81, 8.53, 8.4, 8.06, 7.33, 6.5, 6.15, 6.23, 6.43, 6.34, 6.84, 6.76, 7.17, 7.2, 6.93, 6.83, 6.71, 7.39, 8.49, 9.24, 9.36, 10.64, 9.95, 9.4, 9.6, 9.28, 8.52, 8.78, 8.71, 8.59, 8.34, 8.81, 9.12, 9.53, 10.3, 10.67, 10.89, 10.47, 9.67, 8.95, 8.79, 9.18, 9.92, 10.25]
	print(tomorrow_accuracy['test'])
	l.append({
		'name': 'uncertainty',
		'color': '#b3b3ff',
		'data': [None]*(len(load_leading_up) - 72) + [x*u*.01*2 for u, x in zip(uncertainty, all_load)],
	})

	l.append({
		'id': 'transparent',
		'color': 'rgba(255,255,255,0)',
		'data': [None]*(len(load_leading_up) - 72) + [x*(1-u*.01) for u, x in zip(uncertainty, all_load)]
	})
	

	o['previous_months'] = l

	o['load_test_accuracy'] = round(tomorrow_accuracy['test'], 2)
	o['load_train_accuracy'] = round(tomorrow_accuracy['train'], 2)
	o['tomorrow_test_accuracy'] = round(tomorrow_accuracy['test'], 2)
	o['tomorrow_train_accuracy'] = round(tomorrow_accuracy['train'], 2)
	o['two_day_peak_train_accuracy'] = round(two_day_load_accuracy['train'], 2)
	o['two_day_peak_test_accuracy'] = round(two_day_load_accuracy['test'], 2)
	o['three_day_peak_train_accuracy'] = round(three_day_load_accuracy['train'], 2)
	o['three_day_peak_test_accuracy'] = round(three_day_load_accuracy['test'], 2)


	o['peak_percent_chance'] = peak_likelihood(
		hist=highest_peak_this_month(df[:-48], tomorrow), 
		tomorrow=tomorrow_peak,
		tomorrow_std=tomorrow_peak*tomorrow_accuracy['test']*.01,
		two_day=two_day_peak,
		two_day_std=two_day_peak*two_day_load_accuracy['test']*.01,
		three_day=three_day_peak,
		three_day_std=three_day_peak*three_day_load_accuracy['test']*.01
	)

	o['stderr'] = ''

	with open(pJoin(modelDir,'one_day_model.h5'), 'rb') as f:
		one_day_model = base64.standard_b64encode(f.read()).decode()
	with open(pJoin(modelDir,'two_day_model.h5'), 'rb') as f:
		two_day_model = base64.standard_b64encode(f.read()).decode()
	with open(pJoin(modelDir,'three_day_model.h5'), 'rb') as f:
		three_day_model = base64.standard_b64encode(f.read()).decode()

	# re-input values (i.e. modify the mutable dictionary that is used in heavyprocessing!!!!!!)
	ind['newModel'] = 'False',
	ind['one_day_model'] = one_day_model,
	ind['one_day_model_filename'] = 'one_day_model.h5',
	ind['two_day_model'] = two_day_model,
	ind['two_day_model_filename'] = 'two_day_model.h5',
	ind['three_day_model'] = three_day_model,
	ind['three_day_model_filename'] = 'three_day_model.h5',

	return o

Exemplo n.º 6

0

Exibir arquivo

def work(modelDir, ind):
    #print(ind)
    ''' Run the model in its directory.'''
    # drop inverter efficiency
    # drop DoD
    (cellCapacity, dischargeRate, chargeRate, cellQuantity, cellCost) = \
     [float(ind[x]) for x in ('cellCapacity', 'dischargeRate', 'chargeRate', 'cellQuantity', 'cellCost')]
    battEff = float(ind.get("batteryEfficiency")) / 100.0
    dodFactor = float(ind.get('dodFactor')) / 100.0
    projYears = int(ind.get('projectionLength'))
    batteryCycleLife = int(ind.get('batteryCycleLife'))

    o = {}

    try:
        with open(pJoin(modelDir, 'hist.csv'), 'w') as f:
            f.write(ind['historicalData'])  #.replace('\r', ''))
        df = pd.read_csv(pJoin(modelDir, 'hist.csv'), parse_dates=['dates'])
        df['month'] = df['dates'].dt.month
        df['dayOfYear'] = df['dates'].dt.dayofyear
        assert df.shape[0] >= 26280  # must be longer than 3 years
        assert df.shape[1] == 5
    except ZeroDivisionError:
        raise Exception("CSV file is incorrect format.")

    # retrieve goal
    goal = ind['goal']
    threshold = float(ind['transformerThreshold']) * 1000
    confidence = float(ind['confidence']) / 100

    # train model on previous data
    all_X = fc.makeUsefulDf(df)
    all_y = df['load']
    X_train, y_train = all_X[:-8760], all_y[:-8760]
    clf = linear_model.SGDRegressor(max_iter=10000, tol=1e-4)
    clf.fit(X_train, y_train)

    # ---------------------- MAKE PREDICTIONS ------------------------------- #
    X_test, y_test = all_X[-8760:], all_y[-8760:]

    # Collect data necessary for dispatch calculations
    predictions = clf.predict(X_test)
    dailyLoadPredictions = [
        predictions[i:i + 24] for i in range(0, len(predictions), 24)
    ]
    weather = df['tempc'][-8760:]
    dailyWeatherPredictions = [
        weather[i:i + 24] for i in range(0, len(weather), 24)
    ]
    month = df['month'][-8760:]

    dispatched = [False] * 365
    # decide to implement VBAT every day for a year
    VB_power, VB_energy = [], []
    for i, (load24, temp24, m) in enumerate(
            zip(dailyLoadPredictions, dailyWeatherPredictions, month)):
        peak = max(load24)
        if fc.shouldDispatchDeferral(peak, df, confidence, threshold):
            dispatched[i] = True
            vbp, vbe = fc.pulp24hrBattery(load24, dischargeRate * cellQuantity,
                                          cellCapacity * cellQuantity, battEff)
            VB_power.extend(vbp)
            VB_energy.extend(vbe)
        else:
            VB_power.extend([0] * 24)
            VB_energy.extend([0] * 24)

    # -------------------- MODEL ACCURACY ANALYSIS -------------------------- #

    o['predictedLoad'] = list(clf.predict(X_test))
    o['trainAccuracy'] = round(clf.score(X_train, y_train) * 100, 2)
    o['testAccuracy'] = round(clf.score(X_test, y_test) * 100, 2)

    # PRECISION AND RECALL
    maxDays = []
    for month in range(1, 13):
        test = df[df['month'] == month]
        maxDays.append(test.loc[test['load'].idxmax()]['dayOfYear'])

    shouldHaveDispatched = [False] * 365
    for day in maxDays:
        shouldHaveDispatched[day] = True

    truePositive = len([
        b
        for b in [i and j for (i, j) in zip(dispatched, shouldHaveDispatched)]
        if b
    ])
    falsePositive = len([
        b for b in
        [i and (not j) for (i, j) in zip(dispatched, shouldHaveDispatched)]
        if b
    ])
    falseNegative = len([
        b for b in [(not i) and j
                    for (i, j) in zip(dispatched, shouldHaveDispatched)] if b
    ])
    o['precision'] = round(
        truePositive / float(truePositive + falsePositive) * 100, 2)
    o['recall'] = round(
        truePositive / float(truePositive + falseNegative) * 100, 2)
    o['number_of_dispatches'] = len([i for i in dispatched if i])
    o['MAE'] = round(
        sum([abs(l - m) / m * 100
             for l, m in zip(predictions, list(y_test))]) / 8760., 2)

    # ---------------------- FINANCIAL ANALYSIS ----------------------------- #

    o['VBpower'], o['VBenergy'] = list(VB_power), list(VB_energy)

    # Calculate monthHours
    year = df[-8760:].copy()
    year.reset_index(inplace=True)
    year['hour'] = list(year.index)
    start = list(year.groupby('month').first()['hour'])
    finish = list(year.groupby('month').last()['hour'])
    monthHours = [(s, f + 1) for (s, f) in zip(start, finish)]

    demand = list(y_test)
    peakDemand = [max(demand[s:f]) for s, f in monthHours]
    energyMonthly = [sum(demand[s:f]) for s, f in monthHours]
    demandAdj = [d + p for d, p in zip(demand, o['VBpower'])]
    peakAdjustedDemand = [max(demandAdj[s:f]) for s, f in monthHours]
    energyAdjustedMonthly = [sum(demandAdj[s:f]) for s, f in monthHours]

    o['demand'] = demand
    o['peakDemand'] = peakDemand
    o['energyMonthly'] = energyMonthly
    o['demandAdjusted'] = demandAdj
    o['peakAdjustedDemand'] = peakAdjustedDemand
    o['energyAdjustedMonthly'] = energyAdjustedMonthly

    initInvestment = cellCost * cellQuantity
    eCost = float(ind['electricityCost'])
    dCharge = float(ind['demandChargeCost'])

    o['VBdispatch'] = [dal - d for dal, d in zip(demandAdj, demand)]
    o['energyCost'] = [em * eCost for em in energyMonthly]
    o['energyCostAdjusted'] = [eam * eCost for eam in energyAdjustedMonthly]
    o['demandCharge'] = [peak * dCharge for peak in peakDemand]
    o['demandChargeAdjusted'] = [
        pad * dCharge for pad in o['peakAdjustedDemand']
    ]
    o['totalCost'] = [
        ec + dcm for ec, dcm in zip(o['energyCost'], o['demandCharge'])
    ]
    o['totalCostAdjusted'] = [
        eca + dca
        for eca, dca in zip(o['energyCostAdjusted'], o['demandChargeAdjusted'])
    ]
    o['savings'] = [
        tot - tota for tot, tota in zip(o['totalCost'], o['totalCostAdjusted'])
    ]

    annualEarnings = sum(o['savings'])  # - something!
    cashFlowList = [annualEarnings] * int(ind['projectionLength'])
    cashFlowList.insert(0, -1 * initInvestment)

    o['NPV'] = np.npv(float(ind['discountRate']) / 100, cashFlowList)
    o['SPP'] = initInvestment / annualEarnings
    o['netCashflow'] = cashFlowList
    o['cumulativeCashflow'] = [
        sum(cashFlowList[:i + 1]) for i, d in enumerate(cashFlowList)
    ]

    o['dataCheck'] = 'Threshold exceeded' if any(
        [threshold > i for i in demandAdj]) and goal == 'deferral' else ''
    o['transformerThreshold'] = threshold if goal == 'deferral' else None

    o['stdout'] = 'Success'
    return o

Exemplo n.º 7

0

Exibir arquivo

def work(modelDir, ind):
	''' Model processing done here. '''
	epochs = int(ind['epochs'])
	o = {}  # See bottom of file for out's structure

	try:
	 	with open(pJoin(modelDir, 'hist.csv'), 'w') as f:
	 		f.write(ind['histCurve'].replace('\r', ''))
		df = pd.read_csv(pJoin(modelDir, 'hist.csv'))
		assert df.shape[0] >= 26280 # must be longer than 3 years
	 	if 'dates' not in df.columns:
		 	df['dates'] = df.apply(
				lambda x: dt(
					int(x['year']), 
					int(x['month']), 
					int(x['day']), 
					int(x['hour'])), 
				axis=1
			)
	except:
		raise Exception("Load CSV file is incorrect format.")

	try:
		weather = [float(i) for i in ind['tempCurve'].split('\n')]
		assert len(weather) == 72, "weather csv in wrong format"
	except:
		raise Exception(ind['tempCurve'])

	# ---------------------- MAKE PREDICTIONS ------------------------------- #
	df, tomorrow = lf.add_day(df, weather[:24])
	all_X = lf.makeUsefulDf(df)
	all_y = df['load']

	#load prediction
	tomorrow_load, model, tomorrow_accuracy = lf.neural_net_next_day(all_X, all_y, epochs=epochs, save_file=pJoin(modelDir, 'neural_net_1day.h5'))
	# tomorrow_load = [13044.3369140625, 12692.4453125, 11894.0712890625, 13391.0185546875, 13378.373046875, 14098.5048828125, 14984.5, 15746.6845703125, 14677.6064453125, 14869.6953125, 14324.302734375, 13727.908203125, 13537.51171875, 12671.90234375, 13390.9970703125, 12111.166015625, 13539.05078125, 15298.7939453125, 14620.8369140625, 15381.9404296875, 15116.42578125, 13652.3974609375, 13599.5986328125, 12882.5185546875]
	# tomorrow_accuracy = {'test': 4, 'train': 3}
	o['tomorrow_load'] = tomorrow_load
	o['month_start'] = dt(tomorrow.year, tomorrow.month, 1).strftime("%A, %B %-d, %Y")
	o['forecast_start'] = tomorrow.strftime("%A, %B %-d, %Y")
	
	# second day
	df, second_day = lf.add_day(df, weather[24:48])
	if second_day.month == tomorrow.month:
		all_X = lf.makeUsefulDf(df, hours_prior=48, noise=5)
		all_y = df['load']
		two_day_predicted_load, two_day_model, two_day_load_accuracy = lf.neural_net_next_day(all_X, all_y, epochs=epochs, hours_prior=48, save_file=pJoin(modelDir, 'neural_net_2day.h5'))
		two_day_peak = max(two_day_predicted_load)

		# third day
		df, third_day = lf.add_day(df, weather[48:72])
		if third_day.month == tomorrow.month:
			all_X = lf.makeUsefulDf(df, hours_prior=72, noise=15)
			all_y = df['load']
			three_day_predicted_load, three_day_model, three_day_load_accuracy = lf.neural_net_next_day(all_X, all_y, epochs=epochs, hours_prior=72, save_file=pJoin(modelDir, 'neural_net_3day.h5'))
			three_day_peak = max(three_day_predicted_load)
		else:
			three_day_peak = 0
			three_day_load_accuracy = {'test': np.nan, 'train': np.nan}
			
	else:
		two_day_peak = 0
		two_day_load_accuracy = {'test': np.nan, 'train': np.nan}
		three_day_peak = 0
		three_day_load_accuracy = {'test': np.nan, 'train': np.nan}

	tomorrow_peak = max(tomorrow_load)
	m = df[(df['month'] == tomorrow.month) & (df['year'] != tomorrow.year) ]
	o['quantile'] = round(m[m['load'] < tomorrow_peak].shape[0]/float(m.shape[0])*100, 2)
	o['predicted_peak'] = [m['load'].median(), highest_peak_this_month(df, tomorrow), tomorrow_peak, two_day_peak, three_day_peak]
	o['predicted_peak_limits'] = [
		[m['load'].min(), m['load'].max()],
		[0, 0],
		[tomorrow_peak*(1 + tomorrow_accuracy['test']*.01), tomorrow_peak*(1 - tomorrow_accuracy['test']*.01)],
		[two_day_peak*(1 + two_day_load_accuracy['test']*.01), two_day_peak*(1 - two_day_load_accuracy['test']*.01)],
		[three_day_peak*(1 + three_day_load_accuracy['test']*.01), three_day_peak*(1 - three_day_load_accuracy['test']*.01)]
	]

	previous_months = [{
		'year': y,
		'load': m[m['year'] == y]['load'].tolist()
	} for y in m.year.unique()]

	# hard-code the input for highcharts
	o['cats_pred'] = list(range(744)) ### FIX THIS

	l = []
	for d in previous_months:
		l.append({
			'name': d['year'],
			'color': 'lightgrey',
			'data': d['load'],
			'type': 'line',
			'opacity': .05,
			'enableMouseTracking': False
		})

	load_leading_up = df[(df['month'] == tomorrow.month) & (df['year'] == tomorrow.year)]['load'].tolist()
	l.append({'name': tomorrow.year, 'color': 'black', 'data': load_leading_up[:-72], 'type': 'line'})
	l.append({'name':'forecast','color':'blue','data': [None]*(len(load_leading_up) - 72) + o['tomorrow_load'],'type': 'line'})

	o['previous_months'] = l

	o['load_test_accuracy'] = round(tomorrow_accuracy['test'], 2)
	o['load_train_accuracy'] = round(tomorrow_accuracy['train'], 2)
	o['tomorrow_test_accuracy'] = round(tomorrow_accuracy['test'], 2)
	o['tomorrow_train_accuracy'] = round(tomorrow_accuracy['train'], 2)
	o['two_day_peak_train_accuracy'] = round(two_day_load_accuracy['train'], 2)
	o['two_day_peak_test_accuracy'] = round(two_day_load_accuracy['test'], 2)
	o['three_day_peak_train_accuracy'] = round(three_day_load_accuracy['train'], 2)
	o['three_day_peak_test_accuracy'] = round(three_day_load_accuracy['test'], 2)


	o['peak_percent_chance'] = peak_likelihood(
		hist=highest_peak_this_month(df[:-48], tomorrow), 
		tomorrow=tomorrow_peak,
		tomorrow_std=tomorrow_peak*tomorrow_accuracy['test']*.01,
		two_day=two_day_peak,
		two_day_std=two_day_peak*two_day_load_accuracy['test']*.01,
		three_day=three_day_peak,
		three_day_std=three_day_peak*three_day_load_accuracy['test']*.01
	)

	o['stderr'] = ''

	return o