示例#1
0
    def fit(self, X, y):
        self._check(X, y)
        if dim(y) == 1:
            raw_X = X
            if self.fit_intercept:
                X = hstack([ones(shape(X)[0], 1), X])

            beta = zeros(shape(X)[1])  # row vector
            X_T = matrix_transpose(X)

            if self.fit_intercept:
                beta[0] = sum(minus(reshape(y, -1), dot(
                    raw_X, beta[1:]))) / (shape(X)[0])

            for _ in range(self.max_iter):
                start = 1 if self.fit_intercept else 0
                for j in range(start, len(beta)):
                    tmp_beta = [x for x in beta]
                    tmp_beta[j] = 0.0

                    r_j = minus(reshape(y, -1), dot(X, beta))
                    # r_j = minus(reshape(y,-1) , dot(X, tmp_beta))
                    arg1 = dot(X_T[j], r_j)
                    arg2 = self.alpha * shape(X)[0]

                    if sum(square(X_T[j])) != 0:
                        beta[j] = self._soft_thresholding_operator(
                            arg1, arg2) / sum(square(X_T[j]))
                    else:
                        beta[j] = 0

                    if self.fit_intercept:
                        beta[0] = sum(
                            minus(reshape(y, -1), dot(
                                raw_X, beta[1:]))) / (shape(X)[0])
                # # add whatch
                # self.beta = beta
                # self._whatch(raw_X,y)

            if self.fit_intercept:
                self.intercept_ = beta[0]
                self.coef_ = beta[1:]
            else:
                self.coef_ = beta
            self.beta = beta
            return self
        elif dim(y) == 2:
            if self.fit_intercept:
                X = hstack([ones(shape(X)[0], 1), X])
            y_t = matrix_transpose(y)
            betas = []
            for i in range(shape(y)[1]):
                betas.append(self._fit(X, y_t[i]))
            batas = matrix_transpose(betas)
            self.betas = batas
示例#2
0
    def fit(self, X, y, weights=None):
        X, y = self._check(X, y)

        if self.fit_intercept:
            m, n = shape(X)
            bias = ones(m, 1)
            X = hstack([bias, X])

        eye = identity_matrix(shape(X)[1])
        from linalg.matrix import diag
        if not self.penalty_bias:
            eye[0][0] = 0

        # add weights
        if weights != None:
            assert (len(weights) == shape(X)[0])
            X = matrix_matmul(diag(weights), X)

        X_T = matrix_transpose(X)

        self.W = matrix_matmul(
            matrix_matmul(
                matrix_inverse(
                    plus(matrix_matmul(X_T, X),
                         multiply(eye,
                                  self.alpha * shape(X)[0]))
                    # plus(matrix_matmul(X_T,X),multiply(eye,self.alpha))
                ),
                X_T),
            y)
        self.importance_ = sum(self.W, axis=1)
        if self.fit_intercept:
            self.importance_ = self.importance_[1:]
示例#3
0
 def predict(self, X):
     assert (self.beta != None or self.betas != None)
     if self.fit_intercept:
         X = hstack([ones(shape(X)[0], 1), X])
     if self.beta != None:
         return dot(X, self.beta)
     else:
         return matrix_matmul(X, self.betas)
示例#4
0
 def predict(self, X):
     assert (self.W != None)
     if self.fit_intercept:
         m, n = shape(X)
         bias = ones(m, 1)
         X = hstack([bias, X])
     result = matrix_matmul(X, self.W)
     if self.dim_Y == 1:
         result = [x[0] for x in result]
     return result
示例#5
0
    def fit(self, X, y):
        X, y = self._check(X, y)
        if self.fit_intercept:
            m, n = shape(X)
            bias = ones(m, 1)
            X = hstack([bias, X])

        X_T = matrix_transpose(X)
        # print matrix_matmul(X_T,X)
        self.W = matrix_matmul(
            matrix_matmul(matrix_inverse(matrix_matmul(X_T, X)), X_T), y)
示例#6
0
    def fit(self, X, y):
        X, y = self._check(X, y)

        if self.fit_intercept:
            m, n = shape(X)
            bias = ones(m, 1)
            X = hstack([bias, X])

        eye = identity_matrix(shape(X)[1])
        from linalg.matrix import diag
        if self.penalty_loss:
            eye = diag(self.penalty_loss)
        X_T = matrix_transpose(X)

        self.W = matrix_matmul(
            matrix_matmul(
                matrix_inverse(
                    plus(matrix_matmul(X_T, X),
                         multiply(eye,
                                  self.alpha * shape(X)[0]))), X_T), y)
        self.importance_ = sum(self.W, axis=1)
        if self.fit_intercept:
            self.importance_ = self.importance_[1:]
示例#7
0
     ],
     [
         0.0, 2.0, 1.0, 1.0, 4.0, 3.0, 0.0, 7.0, 7.0, 0.0, 7.0, 5.0, 0.0, 1.0,
         5.0
     ],
     [
         0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 4.0, 0.0, 0.0, 3.0, 1.0, 0.0, 8.0,
         0.0
     ],
     [
         0.0, 2.0, 3.0, 0.0, 4.0, 8.0, 1.0, 14.0, 1.0, 0.0, 14.0, 0.0, 0.0,
         4.0, 0.0
     ],
     [
         2.0, 4.0, 1.0, 1.0, 4.0, 0.0, 4.0, 12.0, 6.0, 0.0, 3.0, 9.0, 1.0, 4.0,
         1.0
     ],
     [
         1.0, 2.0, 1.0, 0.0, 3.0, 3.0, 11.0, 28.0, 8.0, 4.0, 4.0, 1.0, 0.0,
         0.0, 0.0
     ]]

# print(shift(A,1))
print(shape(vstack([A])))
print(shape(vstack([A, A, A])))
print(vstack([A, A, A]))

print(shape(hstack([A])))
print(shape(hstack([A, shift(A, 1), A])))
print(hstack([A, shift(A, 1), A]))
示例#8
0
def features_building(ecs_logs,flavors_config,flavors_unique,training_start_time,training_end_time,predict_start_time,predict_end_time):
    mapping_index = get_flavors_unique_mapping(flavors_unique)
    predict_days = (predict_end_time-predict_start_time).days

    sample = resampling(ecs_logs,flavors_unique,training_start_time,predict_start_time,frequency=predict_days,strike=1,skip=0)

    def outlier_handling(sample,method='mean',max_sigma=3):
        assert(method=='mean' or method=='zero' or method=='dynamic')
        sample = matrix_copy(sample)
        std_ = stdev(sample)
        mean_ = mean(sample,axis=1)
        for i in range(shape(sample)[0]):
            for j in range(shape(sample)[1]):
               if sample[i][j]-mean_[j] >max_sigma*std_[j]:
                    if method=='mean':
                        sample[i][j] = mean_[j]
                    elif method=='zero':
                        sample[i][j] = 0
                    elif method=='dynamic':
                        sample[i][j] = (sample[i][j] + mean_[j])/2.0
                        
        return sample

    sample = outlier_handling(sample,method='mean',max_sigma=3)
    # sample = exponential_smoothing(sample,alpha=0.2)

    Ys = sample[1:]

    def flavor_clustering(sample,k=3,variance_threshold=None):
        corrcoef_sample = corrcoef(sample)
        clustering_paths = []
        for i in range(shape(sample)[1]):
            col = corrcoef_sample[i]
            col_index_sorted = argsort(col)[::-1]
            if variance_threshold!=None:
                col_index_sorted = col_index_sorted[1:]
                index = [i  for i in col_index_sorted if col[i]>variance_threshold]
            else:
                index = col_index_sorted[1:k+1]
            clustering_paths.append(index)
        return clustering_paths,corrcoef_sample


    # adjustable # 1
    variance_threshold = 0.6 #76.234

    clustering_paths,coef_sample = flavor_clustering(sample,variance_threshold=variance_threshold)

    def get_feature_grid(sample,i,fill_na='mean',max_na_rate=1,col_count=None,with_test=True):
        assert(fill_na=='mean' or fill_na=='zero')
        col = fancy(sample,None,i)
        R = []
        for j in range(len(col)):
            left = [None for _ in range(len(col)-j)]
            right = col[:j]
            r = []
            r.extend(left)
            r.extend(right)
            R.append(r)

        def _mean_with_none(A):
            if len(A)==0:
                return 0
            else:
                count = 0
                for i in range(len(A)):
                    if A[i]!=None:
                        count+=A[i]
                return count/float(len(A))
        
        means = []
        for j in range(shape(R)[1]):
            means.append(_mean_with_none(fancy(R,None,j)))
        
        width = int((1-max_na_rate) * shape(R)[1])
        R = fancy(R,None,(width,))
        for _ in range(shape(R)[0]):
            for j in range(shape(R)[1]):
                    if R[_][j]==None:
                        if fill_na=='mean':
                            R[_][j] = means[j]
                        elif fill_na=='zero':
                            R[_][j]=0
        if with_test:
            if col_count!=None:
                return fancy(R,None,(-col_count,))
            else:
                return R
        else:
            if col_count!=None:
                return fancy(R,(0,-1),(-col_count,))
            else:            
                return R[:-1]


    # def get_rate_X(sample,j):
    #     sum_row = sum(sample,axis=1)
    #     A = [sample[i][j]/float(sum_row[i]) if sum_row[i]!=0 else 0 for i in range(shape(sample)[0])]
    #     return A

    # def get_cpu_rate_X(sample,i):
    #     cpu_config,mem_config = get_machine_config(flavors_unique)
    #     sample_copy = matrix_copy(sample)
    #     for i in range(shape(sample_copy)[0]):
    #         for j in range(shape(sample_copy)[1]):
    #             sample_copy[i][j] *= cpu_config[j]

    #     sample = sample_copy
    #     sum_row = sum(sample,axis=1)
    #     A = [sample[i][j]/float(sum_row[i]) if sum_row[i]!=0 else 0 for i in range(shape(sample)[0])]
    #     return A

    # def get_men_rate_X(sample,i):
    #     cpu_config,mem_config = get_machine_config(flavors_unique)
    #     sample_copy = matrix_copy(sample)
    #     for i in range(shape(sample_copy)[0]):
    #         for j in range(shape(sample_copy)[1]):
    #             sample_copy[i][j] *= mem_config[j]

    #     sample = sample_copy
    #     sum_row = sum(sample,axis=1)
    #     A = [sample[i][j]/float(sum_row[i]) if sum_row[i]!=0 else 0 for i in range(shape(sample)[0])]
    #     return A

    X_trainS,Y_trainS,X_test_S = [],[],[]


    # adjustable # 2 
    col_count = 5 # n_feature

    for f in flavors_unique:
        X = get_feature_grid(sample,mapping_index[f],col_count=col_count,fill_na='mean',max_na_rate=1,with_test=True)
        X_test = X[-1:]
        X = X[:-1]
        y = fancy(Ys,None,(mapping_index[f],mapping_index[f]+1))


        clustering = True
        # 1.data clustering 
        if clustering:
            print(clustering_paths[mapping_index[f]])
            # improve weights of X and y
            X.extend(X)
            y.extend(y)

            for cluster_index in clustering_paths[mapping_index[f]]:
                X_cluster = get_feature_grid(sample,mapping_index[f],col_count=col_count,fill_na='mean',max_na_rate=1,with_test=False)
                y_cluster = fancy(Ys,None,(cluster_index,cluster_index+1))
                w =  coef_sample[mapping_index[f]][cluster_index]

                # important
                X_cluster = apply(X_cluster,lambda x:x*w)
                y_cluster = apply(y_cluster,lambda x:x*w)

                X.extend(X_cluster)
                y.extend(y_cluster)

        # do not delete
        X.extend(X_test)


        # --------------------------------------------------------- #
        add_list= [X]
        # add_list = []
        # add_list.extend([sqrt(X)])
        add_list.extend([apply(X,lambda x:math.log1p(x))]) # important
        X = hstack(add_list)
        # --------------------------------------------------------- #

        def multi_exponential_smoothing(A,list_of_alpha):
            R = A
            for a in list_of_alpha:
                R = exponential_smoothing(R,alpha=a)
            return R

        # #adjustable #3 smoothing degree 
        # # 77.291 3
        # #	77.405 no.63
        # depth = 3
        # #adjustable #4 smoothing weights
        # # base = [0.3,0.5,0.7,0.8] # 3.0.6,0.7,0.8 77.163
        # # base = [0.1,0.3,0.5] # 3.0.6,0.7,0.8 77.163
        base = [0.6,0.7,0.8]

        depth = 3
        # base = [0.7,0.8,0.9]


        alphas = [[ base[i]  for _ in range(depth)]for i in range(len(base))]

        X_data_list = [multi_exponential_smoothing(X[:-1],a) for a in alphas]
        Y_data_list = [multi_exponential_smoothing(y,a) for a in alphas]
        
        X_data_list.extend([X])
        Y_data_list.extend([y])
        X = vstack(X_data_list)
        y = vstack(Y_data_list)

        # # # --------------------------------------------------------- #



        
        # -----------------------------------------------------------#

        y = flatten(y)
        X = normalize(X,y=y,norm='l1')

        assert(shape(X)[0]==shape(y)[0]+1)
        X_trainS.append(X[:-1])
        X_test_S.append(X[-1:])
        Y_trainS.append(y)

    return X_trainS,Y_trainS,X_test_S
示例#9
0
def predict_flavors(ecs_logs, flavors_config, flavors_unique, training_start,
                    training_end, predict_start, predict_end):
    predict_days = (predict_end - predict_start).days  #check
    hours = ((predict_end - predict_start).seconds / float(3600))
    if hours >= 12:
        predict_days += 1

    skip_days = (predict_start - training_end).days

    # print(skip_days) #checked
    # print(predict_days) #checked

    # sample = resampling(ecs_logs,flavors_unique,training_start,training_end,frequency=predict_days,strike=predict_days,skip=0)
    sample = resampling(ecs_logs,
                        flavors_unique,
                        training_start,
                        training_end,
                        frequency=1,
                        strike=1,
                        skip=0)

    def outlier_handling(sample, method='mean', max_sigma=3):
        assert (method == 'mean' or method == 'dynamic')
        std_ = stdev(sample)
        mean_ = mean(sample, axis=0)
        for i in range(shape(sample)[0]):
            for j in range(shape(sample)[1]):
                if sample[i][j] - mean_[j] > max_sigma * std_[j]:
                    if method == 'mean':
                        sample[i][j] = mean_[j]
                    elif method == 'dynamic':
                        if i < len(sample) / 2.0:
                            sample[i][j] = (mean_[j] + sample[i][j]) / 2.0
        return sample

    # sample = outlier_handling(sample,method='dynamic',max_sigma=3)
    # sample = outlier_handling(sample,method='mean',max_sigma=3.5)

    # from preprocessing import exponential_smoothing
    # sample = exponential_smoothing(exponential_smoothing(sample,alpha=0.2),alpha=0.2)

    skip_days -= 1
    prediction = []
    for i in range(shape(sample)[1]):

        clf = Ridge(alpha=1, fit_intercept=True)

        X = reshape(list(range(len(sample))), (-1, 1))
        y = fancy(sample, None, (i, i + 1))

        X_test = reshape(
            list(range(len(sample),
                       len(sample) + skip_days + predict_days)), (-1, 1))

        X_list = [X]
        X = hstack(X_list)

        X_test_list = [X_test]
        X_test = hstack(X_test_list)

        clf.fit(X, y)
        p = clf.predict(X_test)

        prediction.append(sum(flatten(p)))

    prediction = [int(round(p)) if p > 0 else 0 for p in prediction]

    return prediction