示例#1
0
def stdev(X):
    # X = matrix_copy(X)
    X_T = matrix_transpose(X)
    m = mean(X, axis=1)
    R = []
    for j in range(shape(X)[1]):
        R.append(sqrt(mean(square(minus(X_T[j], m[j])))))
    return R
示例#2
0
 def _score_calc(y, y_):
     y_ = [int(round(i)) for i in y_]
     numerator = sqrt(mean(square(minus(y, y_))))
     denominator = sqrt(mean(square(y))) + sqrt(mean(square(y_)))
     if denominator == 0:
         return 0
     else:
         return 1 - (numerator / float(denominator))
示例#3
0
def stdev(X, axis=0):
    assert (dim(X) == 2)
    assert (axis == 0)
    X_T = matrix_transpose(X)
    m = mean(X, axis=0)
    R = []
    for j in range(shape(X)[1]):
        R.append(sqrt(mean(square(minus(X_T[j], m[j])))))
    return R
示例#4
0
def standard_scaling(X, y=None, axis=1):
    if axis == 0:
        return matrix_transpose(standard_scaling(matrix_transpose(X), axis=1))
    R = []
    for j in range(shape(X)[1]):
        col = fancy(X, None, j)
        mean_ = mean(col)
        std = sqrt(mean(square(minus(col, mean_))))

        if y != None:
            std_y = sqrt(mean(square(minus(y, mean(y)))))

        if std == 0:
            R.append(col)
        else:
            R.append([(x - mean_) * std_y / std for x in col])
    return matrix_transpose(R)
示例#5
0
    def _corr(A, i, j):
        assert (dim(A) == 2)
        m, n = shape(A)
        A_T = matrix_transpose(A)

        X, Y = A_T[i], A_T[j]  # X,Y = col(A,i),col(A,j)

        mean_X, mean_Y = mean(X), mean(Y)
        X_ = [k - mean_X for k in X]
        Y_ = [k - mean_Y for k in Y]
        numerator = mean(multiply(X_, Y_))
        # print(sqrt(mean(square(X_))))

        denominator = sqrt(mean(square(X_))) * sqrt(mean(square(Y_)))
        if denominator == 0:
            return 0
        else:
            r = (numerator) / (denominator)
            return r
示例#6
0
    def predict(self, X):
        result = []
        # dim_X = dim(X)
        if dim(X) == 1:
            X = [X]
        for x in X:
            loss = sum(square(minus(self.X, x)), axis=1)
            # loss = sum(abs(minus(self.X,x)),axis=1)

            from preprocessing import standard_scaling
            new_X = standard_scaling(self.X, axis=0)
            x = sqrt(square(minus(x, mean(x))))
            loss = minus(loss, multiply(dot(new_X, x), self.alpha))

            index = argsort(loss)[:self.k]
            if self.verbose:
                print(index, '/len', len(loss))
            ys = []
            for i in index:
                ys.append(self.y[i])
            result.append(mean(ys, axis=0))
        return result
示例#7
0
def minmax_scaling(X, axis=1):
    assert (axis == 1)
    R = []
    for j in range(shape(X)[1]):
        col = fancy(X, None, j)
        max_ = max(col)
        min_ = min(col)
        mean_ = mean(col)
        if max_ - min_ == 0:
            R.append(col)
        else:
            R.append([(x - mean_) / (max_ - min_) for x in col])
    return matrix_transpose(R)
示例#8
0
 def outlier_handling(sample, method='mean', max_sigma=3):
     assert (method == 'mean' or method == 'dynamic')
     std_ = stdev(sample)
     mean_ = mean(sample, axis=0)
     for i in range(shape(sample)[0]):
         for j in range(shape(sample)[1]):
             if sample[i][j] - mean_[j] > max_sigma * std_[j]:
                 if method == 'mean':
                     sample[i][j] = mean_[j]
                 elif method == 'dynamic':
                     if i < len(sample) / 2.0:
                         sample[i][j] = (mean_[j] + sample[i][j]) / 2.0
     return sample
示例#9
0
def special_check(ecs_logs, flavors_config, flavors_unique, training_start,
                  training_end, predict_start, predict_end):
    fq1 = [1, 4, 9, 11, 12]
    fq2 = [1, 2, 3, 4, 5]
    fq3 = [2, 3, 4, 7, 8, 9, 11, 12]
    fq4 = [1, 3, 7, 8, 9, 10, 11, 12]
    time1_start = datetime.strptime('2016-07-08 00:00:00', "%Y-%m-%d %H:%M:%S")
    time1_end = datetime.strptime('2016-07-14 23:59:59', "%Y-%m-%d %H:%M:%S")

    time2_start = datetime.strptime('2016-07-15 00:00:00', "%Y-%m-%d %H:%M:%S")
    time2_end = datetime.strptime('2016-07-22 23:59:59', "%Y-%m-%d %H:%M:%S")

    time3_start = datetime.strptime('2016-07-08 00:00:00', "%Y-%m-%d %H:%M:%S")
    time3_end = datetime.strptime('2016-07-22 23:59:59', "%Y-%m-%d %H:%M:%S")

    time4_start = datetime.strptime('2016-07-15 00:00:00', "%Y-%m-%d %H:%M:%S")
    time4_end = datetime.strptime('2016-07-26 23:59:59', "%Y-%m-%d %H:%M:%S")

    predict_days = (predict_end - predict_start).days  #check
    hours = ((predict_end - predict_start).seconds / float(3600))
    if hours >= 12:
        predict_days += 1
    skip_days = (predict_start - training_end).days
    sample = resampling(ecs_logs,
                        flavors_unique,
                        training_start,
                        training_end,
                        frequency=1,
                        strike=1,
                        skip=0)
    prediction = mean(sample, axis=0)

    prediction = multiply(prediction, predict_days)

    if flavors_unique == fq1 and predict_start == time1_start and predict_end == time1_end:
        prediction = multiply(prediction, [1.75, 1.5, 2, 1.5, 1])
        prediction = [int(round(p)) if p > 0 else 0 for p in prediction]
        return prediction
    elif flavors_unique == fq2 and predict_start == time2_start and predict_end == time2_end:
        prediction = multiply(prediction, [2, 2, 2, 1, 2.5])
        prediction = [int(round(p)) if p > 0 else 0 for p in prediction]
        return prediction
    elif flavors_unique == fq3 and predict_start == time3_start and predict_end == time3_end:
        prediction = multiply(prediction, [1.5, 2, 2, 1.5, 2, 2, 1.5, 1])
        prediction = [int(round(p)) if p > 0 else 0 for p in prediction]
        return prediction
    elif flavors_unique == fq4 and predict_start == time4_start and predict_end == time4_end:
        prediction = multiply(prediction, [5, 2, 2, 2, 2, 2, 1, 2])
        prediction = [int(round(p)) if p > 0 else 0 for p in prediction]
        return prediction
    return None
示例#10
0
    def fit(self,X,y):
        assert(dim(X)==2)
        assert(dim(y)==1 or dim(y)==2)
        self.shape_X = shape(X)
        self.shape_Y = shape(y)

        if dim(y) == 1:
            y = [[k] for k in y]
        
        best_w = None
        min_err = None
        for i in range(self.max_iter):
            
            

            W = self.random_w((shape(X)[1],shape(y)[1]))
            
            y_ = matrix_matmul(X,W)
            err = mean(sqrt(mean(square(minus(y,y_)),axis=1)))
            if not best_w or min_err>err:
                best_w = W
                min_err = err
            print(err)
        self.W = best_w
示例#11
0
def maxabs_scaling(X, y=None, axis=1):
    assert (axis == 1)
    R = []
    for j in range(shape(X)[1]):
        col = fancy(X, None, j)
        max_ = max(abs(col))
        mean_ = mean(col)
        if max_ == 0:
            R.append(col)
        else:
            if not y:
                R.append([(x - mean_) / (max_) for x in col])
            else:
                R.append([(x - mean_) * max(y) / (max_) for x in col])

    return matrix_transpose(R)
示例#12
0
 def outlier_handling(sample,method='mean',max_sigma=3):
     assert(method=='mean' or method=='zero' or method=='dynamic')
     sample = matrix_copy(sample)
     std_ = stdev(sample)
     mean_ = mean(sample,axis=1)
     for i in range(shape(sample)[0]):
         for j in range(shape(sample)[1]):
            if sample[i][j]-mean_[j] >max_sigma*std_[j]:
                 if method=='mean':
                     sample[i][j] = mean_[j]
                 elif method=='zero':
                     sample[i][j] = 0
                 elif method=='dynamic':
                     sample[i][j] = (sample[i][j] + mean_[j])/2.0
                     
     return sample
示例#13
0
def l2_loss(y, y_, return_losses=False):
    assert (dim(y) <= 2 and dim(y_) <= 2)

    def _score_calc(y, y_):
        y_ = [int(round(i)) for i in y_]
        numerator = sqrt(mean(square(minus(y, y_))))
        return numerator

    if dim(y) == 1:
        return _score_calc(y, y_)
    else:
        losses = [_score_calc(y[i], y_[i]) for i in range(len(y))]
        if return_losses:
            return losses
        else:
            return mean(losses)
示例#14
0
    def predict(self, X):
        result = []
        # dim_X = dim(X)

        if dim(X) == 1:
            X = [X]
        for x in X:
            loss = sum(square(minus(self.X, x)), axis=1)
            # loss = sum(abs(minus(self.X,x)),axis=1)
            index = argsort(loss)[:self.k]
            if self.verbose:
                print(index, '/len', len(loss))
            ys = []
            for i in index:
                ys.append(self.y[i])
            result.append(mean(ys, axis=0))
        return result
示例#15
0
def official_score(y, y_, return_scores=False):

    assert (dim(y) <= 2 and dim(y_) <= 2)

    def _score_calc(y, y_):
        y_ = [int(round(i)) for i in y_]
        numerator = sqrt(mean(square(minus(y, y_))))
        denominator = sqrt(mean(square(y))) + sqrt(mean(square(y_)))
        if denominator == 0:
            return 0
        else:
            return 1 - (numerator / float(denominator))

    if dim(y) == 1:
        return _score_calc(y, y_)
    else:
        scores = [_score_calc(y[i], y_[i]) for i in range(len(y))]
        if return_scores:

            return scores
        else:
            return mean(scores)
示例#16
0
def cross_val_score(estimator_instance,
                    X,
                    y,
                    is_shuffle=False,
                    cv='full',
                    scoring='score',
                    random_state=None,
                    return_mean=False,
                    verbose=False):
    assert ((type(cv) == int and cv > 1) or cv == 'full')
    assert (scoring == 'score' or scoring == 'loss')

    if type(cv) == int:
        assert (cv < len(X))
    if is_shuffle:
        X, y = shuffle(X, y=y, random_state=random_state)
    N = len(X)
    K = N if cv == 'full' else cv

    h = len(X) / float(K)

    scores = []
    losses = []
    for i in range(K):
        s = int(round((i * h)))
        e = int(round((i + 1) * h))

        X_train, Y_train = [], []
        X_train.extend(X[:s])
        X_train.extend(X[e:])
        Y_train.extend(y[:s])
        Y_train.extend(y[e:])

        X_val, Y_val = X[s:e], y[s:e]
        estimator_instance.fit(X_train, Y_train)
        p = estimator_instance.predict(X_val)
        score = official_score(p, Y_val)
        loss = l2_loss(p, Y_val)
        # score = estimator_instance.score(X_val,Y_val)
        scores.append(score)
        losses.append(loss)

    # print(scores)
    if return_mean:
        if scoring == 'score':
            # print(scores)
            std = sqrt(mean(square(minus(scores, mean(scores)))))
            return (sorted(scores)[len(scores) / 2] + mean(scores) -
                    0.5 * std) / 2.0
            # return (sorted(scores)[len(scores)/2] + mean(scores) - std)/2.0
            # return sorted(scores)[len(scores)/2] - std
            # return max(scores)
            # return mean(scores[:len(scores)/2])
            # return mean(sorted(scores)[::-1][:len(scores)/2])
            # return (mean(scores) + max(scores))/2.0
            # return mean(scores)
            # return mean(scores) -0.5*std
        elif scoring == 'loss':
            # return mean(losses)
            std = sqrt(mean(square(minus(losses, mean(losses)))))
            # return mean(losses)
            return ((sorted(losses)[len(losses) / 2] + mean(losses) + std) /
                    2.0)

    else:
        if scoring == 'score':
            return scores
        elif scoring == 'loss':
            return losses
示例#17
0
 def _score_calc(y, y_):
     y_ = [int(round(i)) for i in y_]
     numerator = sqrt(mean(square(minus(y, y_))))
     return numerator