Exemplo n.º 1
0
def predict_done():
    LOG_DIR = './ops_logs'
    TIMESTEPS = 10
    RNN_LAYERS = [{'num_units': TIMESTEPS}]
    DENSE_LAYERS = [10, 10]
    TRAINING_STEPS = 1000
    BATCH_SIZE = 100
    PRINT_STEPS = TRAINING_STEPS / 100

    dateparse = lambda dates: pd.datetime.strptime(dates, '%d/%m/%Y %H:%M')
    rawdata = pd.read_csv(
        "./model/input/ElectricityPrice/RealMarketPriceDataPT.csv",
        parse_dates={'timeline': ['date', '(UTC)']},
        index_col='timeline',
        date_parser=dateparse)

    X, y = load_csvdata(rawdata, TIMESTEPS, seperate=False)

    regressor = SKCompat(
        learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS,
                                            DENSE_LAYERS), ))

    validation_monitor = learn.monitors.ValidationMonitor(
        X['val'],
        y['val'],
        every_n_steps=PRINT_STEPS,
        early_stopping_rounds=1000)

    SKCompat(
        regressor.fit(X['train'],
                      y['train'],
                      monitors=[validation_monitor],
                      batch_size=BATCH_SIZE,
                      steps=TRAINING_STEPS))

    predicted = regressor.predict(X['test'])
    mse = mean_absolute_error(y['test'], predicted)

    plot_predicted, = plt.plot(predicted, label='predicted')
    plt.legend(handles=[plot_predicted])

    plt.show()
Exemplo n.º 2
0
BATCH_SIZE = 100
PRINT_STEPS = TRAINING_STEPS / 100

# ## Parameter definitions
#
# - LOG_DIR: log file
# - TIMESTEPS: RNN time steps
# - RNN_LAYERS: RNN layer 정보
# - DENSE_LAYERS: DNN 크기 [10, 10]: Two dense layer with 10 hidden units
# - TRAINING_STEPS: 학습 스텝
# - BATCH_SIZE: 배치 학습 크기
# - PRINT_STEPS: 학습 과정 중간 출력 단계 (전체의 1% 해당하는 구간마다 출력)

# In[15]:

regressor = learn.TensorFlowEstimator(model_fn=lstm_model(
    TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),
                                      n_classes=0,
                                      verbose=1,
                                      steps=TRAINING_STEPS,
                                      optimizer='Adagrad',
                                      learning_rate=0.03,
                                      batch_size=BATCH_SIZE)

# ## Create a regressor with TF Learn
#
# : 예측을 위한 모델 생성. TF learn 라이브러리에 제공되는 TensorFlowEstimator를 사용.
#
# **Parameters**:
#
# - model_fn: 학습 및 예측에 사용할 모델
# - n_classes: label에 해당하는 클래스 수 (0: prediction, 1이상: classification) 확인필요
Exemplo n.º 3
0
from sklearn.metrics import mean_squared_error

from lstm_predictor import generate_data, lstm_model

warnings.filterwarnings("ignore")

LOG_DIR = 'resources/logs/'
TIMESTEPS = 1
RNN_LAYERS = [{'num_units': 400}]
DENSE_LAYERS = None
TRAINING_STEPS = 500
PRINT_STEPS = TRAINING_STEPS  # / 10
BATCH_SIZE = 100

regressor = SKCompat(
    learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS,
                                        DENSE_LAYERS), ))
#   model_dir=LOG_DIR)

X, y = generate_data(np.sin,
                     np.linspace(0, 100, 10000, dtype=np.float32),
                     TIMESTEPS,
                     seperate=False)

noise_train = np.asmatrix(np.random.normal(0, 0.2, len(y['train'])),
                          dtype=np.float32)
noise_val = np.asmatrix(np.random.normal(0, 0.2, len(y['val'])),
                        dtype=np.float32)
noise_test = np.asmatrix(np.random.normal(0, 0.2, len(y['test'])),
                         dtype=np.float32)  #asmatrix

noise_train = np.transpose(noise_train)
dateparse = lambda dates: pd.datetime.strptime(dates, '%d/%m/%Y %H:%M')

rawdata =  pd.read_csv("./TotalLoad.csv",parse_dates={'timeline':['date', '(UTC)']},index_col='timeline', date_parser=dateparse)

rawdata =  pd.read_csv("./TotalLoad.csv",index_col=['date', '(UTC)'])

# parsing errors in pandas


df = pd.read_csv("./TotalLoad.csv")
df

X, y = load_csvdata(rawdata, TIMESTEPS, seperate=False)

regressor = learn.TensorFlowEstimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),n_classes=0,verbose=1,steps=TRAINING_STEPS,optimizer='Adagrad',learning_rate=0.03, batch_size=BATCH_SIZE)

regressor = learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),n_classes=0,verbose=1,steps=TRAINING_STEPS,optimizer='Adagrad',learning_rate=0.03, batch_size=BATCH_SIZE)

https://www.tensorflow.org/api_guides/python/contrib.learn

regressor = learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS))

WARNING:tensorflow:Using temporary folder as model directory: C:\Users\aroug\AppData\Local\Temp\tmp3nwb8sg9
WARNING:tensorflow:Using temporary folder as model directory: C:\Users\aroug\AppData\Local\Temp\tmp3nwb8sg9
INFO:tensorflow:Using default config.
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_task_type': None, '_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_environment': 'local', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000014792A352E8>, '_save_checkpoints_secs': 600, '_task_id': 0, '_keep_checkpoint_ev
Exemplo n.º 5
0
def learningpercell(cell_name):
    warnings.filterwarnings("ignore")

    LOG_DIR = 'resources/logs/'
    TIMESTEPS = 100
    RNN_LAYERS = [{'num_units': 256}, {'num_units': 256}]
    DENSE_LAYERS = [32]
    TRAINING_STEPS = 100
    PRINT_STEPS = TRAINING_STEPS  # / 10
    BATCH_SIZE = 64

    regressor = skflow(
        learn.Estimator(
            model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS)))
    #   model_dir=LOG_DIR)

    # 201512200045
    dateparse = lambda dates: pd.datetime.strptime(dates, '%Y%m%d%H%M')
    rawdata = None
    df_list = []
    for root, dirs, files in os.walk(working_dir):
        file_list = []

        for filename in files:
            if filename.endswith('.csv'):
                file_list.append(os.path.join(root, filename))
        for file in file_list:
            df = pd.read_csv(
                file,
                delimiter="|",
                usecols=[1, 2, 5, 6, 7],
                header=None,
                na_values=["NIL"],
                na_filter=True,
                names=["meas_info", "counter", "cellname", "value", "time"],
                index_col='time')
            # df = df[df["counter"] == 67179778]
            df = df[df["cellname"].str.contains(cell_name)]
            # df.drop('cellname', axis=1, inplace=True)
            # df = df[df["counter"] == 50331671]
            #        print(df[["value"]])
            if not df.empty:
                df_list.append(df[["value"]])

    if df_list:
        rawdata = pd.concat(df_list)
    #        else:
    #            return
    print(len(rawdata))
    if len(rawdata) <= 0:
        print("Den ftanni")
        return

        # rawdata = pd.read_csv("./input/fakehdfs/nms/ystr=2015/ymstr=12/ymdstr=20/hive_0_201512200030.csv", delimiter="|",
    #                      usecols=[7], header=None)

    X, y = load_csvdata(rawdata, TIMESTEPS, seperate=False)

    # noise_train = np.asmatrix(np.random.normal(0, 0.2, len(y['train'])), dtype=np.float32)
    # noise_val = np.asmatrix(np.random.normal(0, 0.2, len(y['val'])), dtype=np.float32)
    # noise_test = np.asmatrix(np.random.normal(0, 0.2, len(y['test'])), dtype=np.float32)  # asmatrix

    # noise_train = np.transpose(noise_train)
    # noise_val = np.transpose(noise_val)
    # noise_test = np.transpose(noise_test)

    # y['train'] = np.add(y['train'], noise_train)
    # y['val'] = np.add(y['val'], noise_val)
    # y['test'] = np.add(y['test'], noise_test)

    # print(type(y['train']))

    print('-----------------------------------------')
    print('train y shape', y['train'].shape)
    print('train y shape_num', y['train'][1:5])
    # print('noise_train shape', noise_train.shape)
    # print('noise_train shape_num', noise_train.shape[1:5])

    # create a lstm instance and validation monitor
    validation_monitor = learn.monitors.ValidationMonitor(
        X['val'],
        y['val'],
    )
    # every_n_steps=PRINT_STEPS,)
    # early_stopping_rounds=1000)
    # print(X['train'])
    # print(y['train'])

    skflow(
        regressor.fit(X['train'],
                      y['train'],
                      monitors=[validation_monitor],
                      batch_size=BATCH_SIZE,
                      steps=TRAINING_STEPS))

    print('X train shape', X['train'].shape)
    print('y train shape', y['train'].shape)

    print('X test shape', X['test'].shape)
    print('y test shape', y['test'].shape)
    predicted = np.asmatrix(regressor.predict(X['test']),
                            dtype=np.float32)  # ,as_iterable=False))
    predicted = np.transpose(predicted)

    rmse = np.sqrt((np.asarray((np.subtract(predicted, y['test'])))**2).mean())

    # this previous code for rmse was incorrect, array and not matricies was needed: rmse = np.sqrt(((predicted - y[
    # 'test']) ** 2).mean())
    score = mean_squared_error(predicted, y['test'])
    nmse = score / np.var(
        y['test']
    )  # should be variance of original data and not data from fitted model, worth to double check

    print("RSME: %f" % rmse)
    print("NSME: %f" % nmse)
    print("MSE: %f" % score)

    plot_test, = plt.plot(y['test'], label='test')
    plot_predicted, = plt.plot(predicted, label='predicted')
    plt.legend(handles=[plot_predicted, plot_test])
    plt.show()