Exemplo n.º 1
0
def merge(ecs_logs,flavors_config,flavors_unique,training_start_time,training_end_time,predict_start_time,predict_end_time):
    predict = {}.fromkeys(flavors_unique)
    for f in flavors_unique:
        predict[f] = 0
    virtual_machine_sum = 0
    mapping_index = get_flavors_unique_mapping(flavors_unique)

    R = []
    X_trainS_raw,Y_trainS_raw,X_testS = features_building(ecs_logs,flavors_config,flavors_unique,training_start_time,training_end_time,predict_start_time,predict_end_time)
    # penalty = [1,1,1,1,0.5,0.5]
    X_trainS = fancy(X_trainS_raw,None,(0,-1),None)
    # X_trainS = X_trainS_raw
    
    Y_trainS = fancy(Y_trainS_raw,None,(0,-1))
    # Y_trainS = Y_trainS_raw

    X_valS = fancy(X_trainS_raw,None,(-1,),None)
    Y_valS = fancy(Y_trainS_raw,None,(-1,))

    #adjustable #5 Ridge Regression alpha
    clf = Ridge(alpha=1)
    from model_selection import grid_search_cv_early_stoping
    
    test = []
    train = []
    val = []
    for i in range(len(flavors_unique)):    
        X = X_trainS[i]
        y = Y_trainS[i]
        # clf = grid_search_cv(Ridge,{"alpha":[0.001,0.01,0.1,0.4,0.7,1,1.5,2]},X,y,cv=20,random_state=42,is_shuffle=True,verbose=True)
        clf = early_stoping(Ridge,{"alpha":sorted([0.01,0.02,0.1,0.4,0.7,1,1.5,2])[::-1]},X,y,X_valS[i],Y_valS[i],verbose=False)
        # clf = grid_search_cv_early_stoping(Ridge,{"alpha":sorted([0.01,0.02,0.1,0.4,0.7,1,1.5,2])[::-1]},X,y,X_valS[i],Y_valS[i],cv=10,random_state=42,is_shuffle=True,verbose=True)
        # clf = Ridge(alpha=(clf_1.alpha + clf_2.alpha))
        # clf = Ridge(alpha=1)
        # clf.fit(X,y)
        train.append(clf.predict(X))
        val.append(clf.predict(X_valS[i]))
        test.append(clf.predict(X_testS[i]))

    # print("shape(train)",shape(train))

    train = matrix_transpose(train)
    
    Y_trainS = matrix_transpose(Y_trainS)
    R.extend(test)

    print("training_score-->",official_score(train,Y_trainS))
    val = matrix_transpose(val)
    Y_valS = matrix_transpose(Y_valS)
    print("validation_score-->",official_score(val,Y_valS))
    
    result = flatten(R)
    result = [0 if r<0 else r for r in result]
    for f in flavors_unique:
        p = result[mapping_index[f]]
        predict[f] = int(round(p))
        virtual_machine_sum += int(round(p))
    return predict,virtual_machine_sum
Exemplo n.º 2
0
def grid_search_cv_early_stoping(estimator,
                                 paramaters,
                                 X,
                                 y,
                                 X_val,
                                 y_val,
                                 is_shuffle=False,
                                 cv='full',
                                 scoring='score',
                                 random_state=None,
                                 verbose=False,
                                 return_parameter=False):
    assert (scoring == 'score')

    def paramater_gen(paramaters):
        N = len(paramaters)
        from itertools import product
        value = list(product(*paramaters.values()))
        for v in value:
            yield dict(zip(paramaters.keys(), v))

    max_model = None
    max_parameter = None
    max_score = None
    min_loss = None
    for p in paramater_gen(paramaters):
        clf = estimator(**p)
        clf.fit(X, y)
        score = cross_val_score(clf,
                                X,
                                y,
                                return_mean=True,
                                is_shuffle=is_shuffle,
                                cv=cv,
                                scoring=scoring,
                                random_state=random_state)
        score_val = official_score(y_val, clf.predict(X_val))

        score = ((0 / 3.0) * score + (3 / 3.0) * score_val)

        # clf.score(X,y)
        if verbose:
            print(p, score)
            pass

        if max_parameter == None or max_score < score:
            max_parameter = p
            max_score = score
            max_model = clf

    if verbose:
        print("max_parameter", max_parameter)

    if return_parameter:
        return max_model, max_parameter
    else:
        return max_model
Exemplo n.º 3
0
def cross_val_score(estimator_instance,
                    X,
                    y,
                    is_shuffle=False,
                    cv='full',
                    scoring='score',
                    random_state=None,
                    return_mean=False,
                    verbose=False):
    assert ((type(cv) == int and cv > 1) or cv == 'full')
    assert (scoring == 'score' or scoring == 'loss')

    if type(cv) == int:
        assert (cv < len(X))
    if is_shuffle:
        X, y = shuffle(X, y=y, random_state=random_state)
    N = len(X)
    K = N if cv == 'full' else cv

    h = len(X) / float(K)

    scores = []
    losses = []
    for i in range(K):
        s = int(round((i * h)))
        e = int(round((i + 1) * h))

        X_train, Y_train = [], []
        X_train.extend(X[:s])
        X_train.extend(X[e:])
        Y_train.extend(y[:s])
        Y_train.extend(y[e:])

        X_val, Y_val = X[s:e], y[s:e]
        estimator_instance.fit(X_train, Y_train)
        p = estimator_instance.predict(X_val)
        score = official_score(p, Y_val)
        loss = l2_loss(p, Y_val)
        # score = estimator_instance.score(X_val,Y_val)
        scores.append(score)
        losses.append(loss)

    # print(scores)
    if return_mean:
        if scoring == 'score':
            # print(scores)
            std = sqrt(mean(square(minus(scores, mean(scores)))))
            return (sorted(scores)[len(scores) / 2] + mean(scores) -
                    0.5 * std) / 2.0
            # return (sorted(scores)[len(scores)/2] + mean(scores) - std)/2.0
            # return sorted(scores)[len(scores)/2] - std
            # return max(scores)
            # return mean(scores[:len(scores)/2])
            # return mean(sorted(scores)[::-1][:len(scores)/2])
            # return (mean(scores) + max(scores))/2.0
            # return mean(scores)
            # return mean(scores) -0.5*std
        elif scoring == 'loss':
            # return mean(losses)
            std = sqrt(mean(square(minus(losses, mean(losses)))))
            # return mean(losses)
            return ((sorted(losses)[len(losses) / 2] + mean(losses) + std) /
                    2.0)

    else:
        if scoring == 'score':
            return scores
        elif scoring == 'loss':
            return losses
Exemplo n.º 4
0
def early_stoping(estimator,
                  paramaters,
                  X,
                  y,
                  X_val,
                  Y_val,
                  scoring='score',
                  verbose=False):
    assert (scoring == 'score' or scoring == 'loss')

    def paramater_gen(paramaters):
        N = len(paramaters)
        from itertools import product
        value = list(product(*paramaters.values()))
        for v in value:
            yield dict(zip(paramaters.keys(), v))

    max_model = None
    max_parameter = None
    max_score = None
    min_loss = None

    last_score = None
    last_loss = None

    score = None
    loss = None

    for p in paramater_gen(paramaters):
        clf = estimator(**p)
        clf.fit(X, y)

        last_score = score
        last_loss = loss

        score = official_score(Y_val, clf.predict(X_val))
        loss = l2_loss(Y_val, clf.predict(X_val))

        if verbose:
            # print(p,score,loss)
            pass

        if last_loss != None and last_loss < loss:
            return max_model
        if last_score != None and last_score > score:
            return max_model

        if scoring == "score":
            if max_parameter == None or max_score < score:
                max_parameter = p
                max_score = score
                max_model = clf
        if scoring == "loss":
            if max_parameter == None or min_loss > score:
                max_parameter = p
                min_loss = score
                max_model = clf

    if verbose:
        print("max_parameter", max_parameter)

    return max_model
Exemplo n.º 5
0
 def score(self,X,y):
     y_ = self.predict(X)
     # print(shape(y_))
     return official_score(y,y_)