-
Notifications
You must be signed in to change notification settings - Fork 0
/
Multi3.py
97 lines (72 loc) · 2.93 KB
/
Multi3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 21 19:19:55 2018
@author: omar_
"""
import pandas as pd
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm
from datetime import date
from datetime import datetime as dt
def agregarDia(dfFilt):
dia=[] #serie donde coloco todo lo que voy a adicionar en la columna
for index,row in dfFilt.iterrows():
dia.append(date.isoweekday(dt.strptime(row['Fecha'],'%d/%m/%Y')))
#https://pandas.pydata.org/pandas-docs/stable/merging.html
sdia=pd.Series(dia, name='Dia')
dfFilt = pd.concat([dfFilt, sdia], axis=1)
return dfFilt
def agregarZona(dfFilt):
zona=[] #serie donde coloco todo lo que voy a adicionar en la columna
dfFilt = dfFilt.sort_values(by = ['Zona de Carga'])
dfFilt = dfFilt.reset_index(drop= True)
n = 1
prev = dfFilt['Zona de Carga'][0]
for index,row in dfFilt.iterrows():
if row['Zona de Carga'] != prev:
n += 1
prev = row['Zona de Carga']
zona.append(n)
#https://pandas.pydata.org/pandas-docs/stable/merging.html
szona=pd.Series(zona, name='Zona')
dfFilt = pd.concat([dfFilt, szona], axis=1)
return dfFilt
df2017 = pd.read_csv("Datos2017.csv", engine='python')
df2018 = pd.read_csv("Datos2018.csv", engine = 'python')
dfFilt17 = df2017[['Fecha','Hora', 'Zona de Carga', 'Precio Zonal ($/MWh)']]
dfFilt18 = df2018[['Fecha','Hora', 'Zona de Carga', 'Precio Zonal ($/MWh)']]
dfFilt17 = agregarDia(dfFilt17)
dfFilt18 = agregarDia(dfFilt18)
dfFilt17 = agregarZona(dfFilt17)
dfFilt18 = agregarZona(dfFilt18)
dfFilt17 =dfFilt17.replace({'Hora':{25:24}})
dfFilt18 =dfFilt18.replace({'Hora':{25:24}})
mean17 = dfFilt17['Precio Zonal ($/MWh)'].mean()
mean18 = dfFilt18['Precio Zonal ($/MWh)'].mean()
dfFilt17['Precio Zonal ($/MWh)'] = dfFilt17['Precio Zonal ($/MWh)'].where(dfFilt17['Precio Zonal ($/MWh)'] > 0, mean17)
dfFilt18['Precio Zonal ($/MWh)'] = dfFilt18['Precio Zonal ($/MWh)'].where(dfFilt18['Precio Zonal ($/MWh)'] > 0, mean18)
df_x_train = dfFilt17[['Hora', 'Dia', 'Zona']]
df_x_test = dfFilt18[['Hora', 'Dia', 'Zona']]
df_y_train = dfFilt17[['Precio Zonal ($/MWh)']]
df_y_test = dfFilt18[['Precio Zonal ($/MWh)']]
regr = linear_model.LinearRegression()
regr.fit(df_x_train, df_y_train)
df_y_pred = regr.predict(df_x_test)
# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print("Mean squared error: %.2f"
% mean_squared_error(df_y_test, df_y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(df_y_test, df_y_pred))
#df_x_train = sm.add_constant(df_x_train)
#df_x_test = sm.add_constant(df_x_test)
model = sm.OLS(df_y_train, df_x_train).fit()
predictions = model.predict(df_x_test)
summ = model.summary()
df_y_pred = pd.DataFrame(df_y_pred)
print(df_y_test.describe())
print(df_y_pred.describe())
df_y_test.plot()
df_y_pred.plot()