Пример #1
0
def get_params():
    dataFeeder = GFeeder()
    out = Parallel(n_jobs=-1)(delayed(put_queue)(n_input, dataFeeder)
                              for n_input in range(4, 21))
    return out
Пример #2
0
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utils.SlidingWindowUtil import SlidingWindow
from io_utils.GFeeder import GFeeder
from estimators.NeuralFlow import NeuralFlowRegressor
from utils.GraphUtil import *
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
dat = pd.read_csv('../sample_610_10min.csv', index_col=0, parse_dates=True)
training_size = 3000
test_size = 600
gFeeder = GFeeder()
dat.cpu_rate = np.array(scaler.fit_transform(dat.cpu_rate))
X_dat = np.array(list(SlidingWindow(dat.cpu_rate, sliding_number=4)))

X_train = X_dat[:training_size]
y_train = np.array(dat.cpu_rate[:training_size].tolist()).reshape(-1, 1)
X_test = X_dat[training_size + 1:training_size + 1 + test_size]
y_test = np.array(dat.cpu_rate[training_size + 1:training_size + test_size +
                               1].tolist()).reshape(-1, 1)

nn = NeuralFlowRegressor(learning_rate=1E-03, hidden_nodes=np.array([55]))
nn.fit(X_train, y_train)
y_pred = nn.predict(X_test)
plot_figure(y_pred=y_pred, y_true=y_test, title="Neural Flow sliding window 4")
Пример #3
0
# Experiment GABPNN
from estimators.GAEstimator import GAEstimator
from estimators.NeuralFlow import NeuralFlowRegressor
from io_utils.GFeeder import GFeeder
from utils.GraphUtil import *
from utils.initializer import *

# length of sliding windows for input
n_sliding_window = 2

#Getting Google cluster data
dataFeeder = GFeeder(skip_lists=1,normalize_space=True)
metric_type = [dataFeeder.CPU_UTIL]
# metric_type = [dataFeeder.MEM_USAGE]
# print metrics_types
# metrics_windows = {
#     dataFeeder.CPU_UTIL : 3,
#     dataFeeder.DISK_IO_TIME,
#     dataFeeder.DISK_SPACE,
#
# }

print "Getting data"
X_train,y_train,X_test,y_test = dataFeeder.split_train_and_test(metrics=metric_type,n_sliding_window=n_sliding_window)
# Number of hiddens node (one hidden layer)
score_print = []
for i in range(10):
    score_list = {}
    for n_hidden in np.arange(120,300,step=10):
        # n_hidden = 200
        # Define neural shape
Пример #4
0
from __init__ import *
from sklearn.metrics import mean_squared_error
from io_utils.GFeeder import GFeeder
from math import fabs
import skflow
from utils.GraphUtil import *
from io_utils.NumLoad import *
import matplotlib.pyplot as plt
model = skflow.TensorFlowEstimator.restore(
    "params/model_full_metric_0.00409917244599")

n_sliding_window = 2
skip_lists = 3
dataFeeder = GFeeder(skip_lists, normalize_space=True)
# dataFeederNormalize = GFeeder()
#
metric_type = [
    dataFeeder.CPU_UTIL, dataFeeder.DISK_IO_TIME, dataFeeder.DISK_SPACE,
    dataFeeder.MEM_USAGE
]

X_train, y_train, X_test, y_test = dataFeeder.split_train_and_test(
    metrics=metric_type, n_sliding_window=n_sliding_window)
# X_trainn,y_trainn,X_testn,y_testn = dataFeederNormalize.split_train_and_test(metrics=metrics_types,n_sliding_window=n_sliding_window)

# X_train,y_train,X_test,y_test = load_training_from_npz("data/gdata/data_training_origin.npz")
# ax = plt.subplot()
# X_trainn,y_trainn,X_testn,y_testn = load_training_from_npz("data/gdata/data_training.npz")
y_pred = model.predict(X_test)
# plot_metric_figure(y_pred=y_pred,y_test=y_test, metric_type=metric_type,title="GA Prediction")
for k, metric in enumerate(metric_type):
Пример #5
0
# coding: utf-8

# In[1]:

from scipy import signal

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from io_utils.GFeeder import GFeeder

# In[9]:
dataFeeder = GFeeder(split_size=5)
raw_data = dataFeeder.read()
n_row = raw_data.shape[0]
workload = raw_data[dataFeeder.CPU_UTIL]

# In[10]:

#tinh autocorrelation
# dich ve goc toa do truoc khi chay
n = len(workload)
variance = workload.var()
workload2 = workload - workload.mean()
r = np.correlate(workload2, workload2, mode='full')[-n:]
# assert np.allclose(r, np.array([(workload[:n-k]*workload[-(n-k):]).sum() for k in range(n)]))
result = r / (n * variance)
# result = r/(variance*(np.arange(n, 0, -1)))
plt.plot(np.array(range(0, n)) / 144.0, result)
Пример #6
0
from estimators.ACOEstimator import ACOEstimator
from io_utils.GFeeder import GFeeder
from utils.initializer import *

param_dicts = {
    "Q": np.arange(0.01, 0.1, step=0.01),
    "epsilon": np.arange(0.1, 1.0, step=0.05),
    "number_of_solutions": np.arange(30, 200)
}
n_windows = 4
n_hidden = 10
# range_training = (-1,28919)
# range_test = (28919,-1)
# metric_types = ["cpu_util","disk_write_rate","disk_read_rate","network_"]
dataFeeder = GFeeder()
X_train, y_train, X_test, y_test = dataFeeder.split_train_and_test(
    n_sliding_window=n_windows)

neural_shape = [
    n_windows * len(dataFeeder.metric_type), n_hidden,
    len(dataFeeder.metric_type)
]
estimator = ACOEstimator()
archive_solution = construct_solution(estimator.number_of_solutions,
                                      neural_shape)
fit_param = {'neural_shape': neural_shape, "archive": archive_solution}
# estimator.fit(X,y,**fit_param)
gs = GridSearchCV(estimator,
                  param_grid=param_dicts,
                  n_jobs=-1,
Пример #7
0
# Experiment GABPNN
import pandas as pd
import numpy as np
from estimators.GAEstimator import GAEstimator
from estimators.NeuralFlow import NeuralFlowRegressor
from estimators.OptimizerNNEstimator import OptimizerNNEstimator
from io_utils.GFeeder import GFeeder
# from utils.GraphUtil import *
# from utils.initializer import *

# length of sliding windows for input
n_sliding_window = 2

#Getting Google cluster data
dataFeeder = GFeeder(skip_lists=1)
metrics_types = [dataFeeder.CPU_UTIL]
# metrics_windows = {
#     dataFeeder.CPU_UTIL : 3,
#     dataFeeder.DISK_IO_TIME,
#     dataFeeder.DISK_SPACE,
#
# }
dat = pd.read_csv('sample_610_10min.csv', index_col=0, parse_dates=True)
print "Getting data"
X_train, y_train, X_test, y_test = dataFeeder.split_train_and_test(
    data=dat, metrics=['cpu_rate'], n_sliding_window=n_sliding_window)
# Number of hiddens node (one hidden layer)

score_list = {}
for n_hidden in np.arange(10, 30, step=1):
    # n_hidden = 80
Пример #8
0
from sklearn.grid_search import GridSearchCV
from estimators.GAEstimator import GAEstimator
from io_utils.GFeeder import GFeeder
from utils.initializer import *
from io_utils.NumLoad import *

param_dicts = {
    "cross_rate": [0.65, 0.7],
    "pop_size": [60],
    "mutation_rate": np.arange(0.01, 0.05, step=0.01),
    'gen_size': [100]
}
n_windows = 2
n_hidden = 150
metric_types = ["cpu_util", "disk_write_rate", "disk_read_rate", "network_"]
dataFeeder = GFeeder(skip_lists=3)
#X_train,y_train,X_test,y_test = dataFeeder.split_train_and_test(n_sliding_window=n_windows)
X_train, y_train, X_test, y_test = load_training_from_npz(
    "data/gdata/data_training.npz")
neural_shape = [
    n_windows * len(dataFeeder.metric_type), n_hidden,
    len(dataFeeder.metric_type)
]
estimator = GAEstimator()
fit_param = {'neural_shape': neural_shape}
# estimator.fit(X,y,**fit_param)
gs = GridSearchCV(estimator,
                  param_grid=param_dicts,
                  n_jobs=-1,
                  fit_params=fit_param,
                  scoring='mean_squared_error')
Пример #9
0
from sklearn.grid_search import GridSearchCV

from estimators.ACOEstimator import ACOEstimator
from io_utils.GFeeder import GFeeder
from utils.initializer import *
from io_utils.NumLoad import *

param_dicts = {
    "Q": np.arange(0.01, 0.05, step=0.01),
    "epsilon": np.arange(0.1, 0.6, step=0.05),
    "number_of_solutions": np.arange(30, 200)
}
n_windows = 3
n_hidden = 15

dataFeeder = GFeeder()
X_train, y_train, X_test, y_test = load_training_from_npz(
    "fuzzy_train_direct.npz")
neural_shape = [len(X_train[0]), n_hidden, 1]
estimator = ACOEstimator()
archive_solution = construct_solution(estimator.number_of_solutions,
                                      neural_shape)
fit_param = {'neural_shape': neural_shape, "archive": archive_solution}
# estimator.fit(X,y,**fit_param)
gs = GridSearchCV(estimator,
                  param_grid=param_dicts,
                  n_jobs=-1,
                  fit_params=fit_param,
                  scoring='mean_squared_error')
gs.fit(X_train, y_train)
print gs.best_estimator_