def get_params(): dataFeeder = GFeeder() out = Parallel(n_jobs=-1)(delayed(put_queue)(n_input, dataFeeder) for n_input in range(4, 21)) return out
import pandas as pd import numpy as np import matplotlib.pyplot as plt from utils.SlidingWindowUtil import SlidingWindow from io_utils.GFeeder import GFeeder from estimators.NeuralFlow import NeuralFlowRegressor from utils.GraphUtil import * from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() dat = pd.read_csv('../sample_610_10min.csv', index_col=0, parse_dates=True) training_size = 3000 test_size = 600 gFeeder = GFeeder() dat.cpu_rate = np.array(scaler.fit_transform(dat.cpu_rate)) X_dat = np.array(list(SlidingWindow(dat.cpu_rate, sliding_number=4))) X_train = X_dat[:training_size] y_train = np.array(dat.cpu_rate[:training_size].tolist()).reshape(-1, 1) X_test = X_dat[training_size + 1:training_size + 1 + test_size] y_test = np.array(dat.cpu_rate[training_size + 1:training_size + test_size + 1].tolist()).reshape(-1, 1) nn = NeuralFlowRegressor(learning_rate=1E-03, hidden_nodes=np.array([55])) nn.fit(X_train, y_train) y_pred = nn.predict(X_test) plot_figure(y_pred=y_pred, y_true=y_test, title="Neural Flow sliding window 4")
# Experiment GABPNN from estimators.GAEstimator import GAEstimator from estimators.NeuralFlow import NeuralFlowRegressor from io_utils.GFeeder import GFeeder from utils.GraphUtil import * from utils.initializer import * # length of sliding windows for input n_sliding_window = 2 #Getting Google cluster data dataFeeder = GFeeder(skip_lists=1,normalize_space=True) metric_type = [dataFeeder.CPU_UTIL] # metric_type = [dataFeeder.MEM_USAGE] # print metrics_types # metrics_windows = { # dataFeeder.CPU_UTIL : 3, # dataFeeder.DISK_IO_TIME, # dataFeeder.DISK_SPACE, # # } print "Getting data" X_train,y_train,X_test,y_test = dataFeeder.split_train_and_test(metrics=metric_type,n_sliding_window=n_sliding_window) # Number of hiddens node (one hidden layer) score_print = [] for i in range(10): score_list = {} for n_hidden in np.arange(120,300,step=10): # n_hidden = 200 # Define neural shape
from __init__ import * from sklearn.metrics import mean_squared_error from io_utils.GFeeder import GFeeder from math import fabs import skflow from utils.GraphUtil import * from io_utils.NumLoad import * import matplotlib.pyplot as plt model = skflow.TensorFlowEstimator.restore( "params/model_full_metric_0.00409917244599") n_sliding_window = 2 skip_lists = 3 dataFeeder = GFeeder(skip_lists, normalize_space=True) # dataFeederNormalize = GFeeder() # metric_type = [ dataFeeder.CPU_UTIL, dataFeeder.DISK_IO_TIME, dataFeeder.DISK_SPACE, dataFeeder.MEM_USAGE ] X_train, y_train, X_test, y_test = dataFeeder.split_train_and_test( metrics=metric_type, n_sliding_window=n_sliding_window) # X_trainn,y_trainn,X_testn,y_testn = dataFeederNormalize.split_train_and_test(metrics=metrics_types,n_sliding_window=n_sliding_window) # X_train,y_train,X_test,y_test = load_training_from_npz("data/gdata/data_training_origin.npz") # ax = plt.subplot() # X_trainn,y_trainn,X_testn,y_testn = load_training_from_npz("data/gdata/data_training.npz") y_pred = model.predict(X_test) # plot_metric_figure(y_pred=y_pred,y_test=y_test, metric_type=metric_type,title="GA Prediction") for k, metric in enumerate(metric_type):
# coding: utf-8 # In[1]: from scipy import signal import matplotlib.pyplot as plt import numpy as np import pandas as pd from io_utils.GFeeder import GFeeder # In[9]: dataFeeder = GFeeder(split_size=5) raw_data = dataFeeder.read() n_row = raw_data.shape[0] workload = raw_data[dataFeeder.CPU_UTIL] # In[10]: #tinh autocorrelation # dich ve goc toa do truoc khi chay n = len(workload) variance = workload.var() workload2 = workload - workload.mean() r = np.correlate(workload2, workload2, mode='full')[-n:] # assert np.allclose(r, np.array([(workload[:n-k]*workload[-(n-k):]).sum() for k in range(n)])) result = r / (n * variance) # result = r/(variance*(np.arange(n, 0, -1))) plt.plot(np.array(range(0, n)) / 144.0, result)
from estimators.ACOEstimator import ACOEstimator from io_utils.GFeeder import GFeeder from utils.initializer import * param_dicts = { "Q": np.arange(0.01, 0.1, step=0.01), "epsilon": np.arange(0.1, 1.0, step=0.05), "number_of_solutions": np.arange(30, 200) } n_windows = 4 n_hidden = 10 # range_training = (-1,28919) # range_test = (28919,-1) # metric_types = ["cpu_util","disk_write_rate","disk_read_rate","network_"] dataFeeder = GFeeder() X_train, y_train, X_test, y_test = dataFeeder.split_train_and_test( n_sliding_window=n_windows) neural_shape = [ n_windows * len(dataFeeder.metric_type), n_hidden, len(dataFeeder.metric_type) ] estimator = ACOEstimator() archive_solution = construct_solution(estimator.number_of_solutions, neural_shape) fit_param = {'neural_shape': neural_shape, "archive": archive_solution} # estimator.fit(X,y,**fit_param) gs = GridSearchCV(estimator, param_grid=param_dicts, n_jobs=-1,
# Experiment GABPNN import pandas as pd import numpy as np from estimators.GAEstimator import GAEstimator from estimators.NeuralFlow import NeuralFlowRegressor from estimators.OptimizerNNEstimator import OptimizerNNEstimator from io_utils.GFeeder import GFeeder # from utils.GraphUtil import * # from utils.initializer import * # length of sliding windows for input n_sliding_window = 2 #Getting Google cluster data dataFeeder = GFeeder(skip_lists=1) metrics_types = [dataFeeder.CPU_UTIL] # metrics_windows = { # dataFeeder.CPU_UTIL : 3, # dataFeeder.DISK_IO_TIME, # dataFeeder.DISK_SPACE, # # } dat = pd.read_csv('sample_610_10min.csv', index_col=0, parse_dates=True) print "Getting data" X_train, y_train, X_test, y_test = dataFeeder.split_train_and_test( data=dat, metrics=['cpu_rate'], n_sliding_window=n_sliding_window) # Number of hiddens node (one hidden layer) score_list = {} for n_hidden in np.arange(10, 30, step=1): # n_hidden = 80
from sklearn.grid_search import GridSearchCV from estimators.GAEstimator import GAEstimator from io_utils.GFeeder import GFeeder from utils.initializer import * from io_utils.NumLoad import * param_dicts = { "cross_rate": [0.65, 0.7], "pop_size": [60], "mutation_rate": np.arange(0.01, 0.05, step=0.01), 'gen_size': [100] } n_windows = 2 n_hidden = 150 metric_types = ["cpu_util", "disk_write_rate", "disk_read_rate", "network_"] dataFeeder = GFeeder(skip_lists=3) #X_train,y_train,X_test,y_test = dataFeeder.split_train_and_test(n_sliding_window=n_windows) X_train, y_train, X_test, y_test = load_training_from_npz( "data/gdata/data_training.npz") neural_shape = [ n_windows * len(dataFeeder.metric_type), n_hidden, len(dataFeeder.metric_type) ] estimator = GAEstimator() fit_param = {'neural_shape': neural_shape} # estimator.fit(X,y,**fit_param) gs = GridSearchCV(estimator, param_grid=param_dicts, n_jobs=-1, fit_params=fit_param, scoring='mean_squared_error')
from sklearn.grid_search import GridSearchCV from estimators.ACOEstimator import ACOEstimator from io_utils.GFeeder import GFeeder from utils.initializer import * from io_utils.NumLoad import * param_dicts = { "Q": np.arange(0.01, 0.05, step=0.01), "epsilon": np.arange(0.1, 0.6, step=0.05), "number_of_solutions": np.arange(30, 200) } n_windows = 3 n_hidden = 15 dataFeeder = GFeeder() X_train, y_train, X_test, y_test = load_training_from_npz( "fuzzy_train_direct.npz") neural_shape = [len(X_train[0]), n_hidden, 1] estimator = ACOEstimator() archive_solution = construct_solution(estimator.number_of_solutions, neural_shape) fit_param = {'neural_shape': neural_shape, "archive": archive_solution} # estimator.fit(X,y,**fit_param) gs = GridSearchCV(estimator, param_grid=param_dicts, n_jobs=-1, fit_params=fit_param, scoring='mean_squared_error') gs.fit(X_train, y_train) print gs.best_estimator_