Пример #1
0
def sk_save_model(model):
    isp = mlsql.params()["internalSystemParam"]
    tempModelLocalPath = isp["tempModelLocalPath"] if "tempModelLocalPath" in isp else "/tmp/"
    dir_name = tempModelLocalPath
    if os.path.exists(dir_name):
        shutil.rmtree(dir_name)
    os.makedirs(dir_name)
    with open(os.path.join(dir_name, "model.pickle"), "wb") as f:
        pickle.dump(model, f, protocol=2)
Пример #2
0
def sk_save_model(model):
    isp = mlsql.params()["internalSystemParam"]
    tempModelLocalPath = isp[
        "tempModelLocalPath"] if "tempModelLocalPath" in isp else "/tmp/"
    dir_name = tempModelLocalPath
    if os.path.exists(dir_name):
        shutil.rmtree(dir_name)
    os.makedirs(dir_name)
    with open(os.path.join(dir_name, "model.pickle"), "wb") as f:
        pickle.dump(model, f, protocol=2)
Пример #3
0
def sklearn_batch_data(fn):
    rd = mlsql.read_data()
    fitParams = mlsql.params()["fitParam"]
    batch_size = int(mlsql.get_param(fitParams, "batchSize", 1000))
    label_size = int(mlsql.get_param(fitParams, "labelSize", -1))
    x_name = mlsql.get_param(fitParams, "inputCol", "features")
    y_name = mlsql.get_param(fitParams, "label", "label")
    for items in rd(max_records=batch_size):
        if len(items) == 0:
            continue
        X = [item[x_name].toArray() for item in items]
        y = [item[y_name] for item in items]
        fn(X, y, label_size)
Пример #4
0
def sklearn_all_data():
    rd = mlsql.read_data()
    fitParams = mlsql.params()["fitParam"]
    X = []
    y = []
    x_name = fitParams["inputCol"] if "inputCol" in fitParams else "features"
    y_name = fitParams["label"] if "label" in fitParams else "label"
    debug = "debug" in fitParams and bool(fitParams["debug"])
    counter = 0
    for items in rd(max_records=1000):
        item_size = len(items)
        if debug:
            counter += item_size
            print("{} collect data from kafka:{}".format(fitParams["alg"], counter))
        if item_size == 0:
            continue
        X = X + [item[x_name].toArray() for item in items]
        y = y + [item[y_name] for item in items]
    return X, y
Пример #5
0
def sklearn_configure_params(clf):
    fitParams = mlsql.params()["fitParam"]

    def t(v, convert_v):
        if type(v) == float:
            return float(convert_v)
        elif type(v) == int:
            return int(convert_v)
        elif type(v) == list:
            if type(v[0]) == int:
                return [int(i) for i in v]
            if type(v[0]) == float:
                return [float(i) for i in v]
            return v
        else:
            return convert_v

    for name in clf.get_params():
        if name in fitParams:
            dv = clf.get_params()[name]
            setattr(clf, name, t(dv, fitParams[name]))
Пример #6
0
    tf.summary.scalar("accuracy", accurate)

summ = tf.summary.merge_all()

sess.run(tf.global_variables_initializer())


def trans(i):
    if i == 0:
        return [0, 1]
    if i == 1:
        return [1, 0]


for items in rd(max_records=2):
    X = [item["features"].toArray() for item in items]
    Y = [trans(item["label"]) for item in items]
    if len(X) > 0:
        _, gs = sess.run([train_step, global_step],
                         feed_dict={input_x: X, input_y: Y})
        [train_accuracy, s, loss] = sess.run([accurate, summ, xent],
                                             feed_dict={input_x: X, input_y: Y})
        print('train_accuracy %g, loss: %g, global step: %d' % (
            train_accuracy,
            loss,
            gs))
    sys.stdout.flush()
p = mlsql.params()
mlsql_model.save_model(p["internalSystemParam"]["tempModelLocalPath"], sess, input_x, input_y, True)
sess.close()
Пример #7
0

if __name__ == "__main__":
    warnings.filterwarnings("ignore")
    np.random.seed(40)

    # Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
    wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             "wine-quality.csv")
    data = pd.read_csv(wine_path)

    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5
    l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    lr.fit(train_x, train_y)
    isp = mlsql.params()["internalSystemParam"]
    tempModelLocalPath = isp["tempModelLocalPath"]
    if not os.path.exists(tempModelLocalPath):
        os.makedirs(tempModelLocalPath)
    with open(tempModelLocalPath + "/model.pkl", "wb") as f:
        pickle.dump(lr, f)
Пример #8
0
    return rmse, mae, r2


if __name__ == "__main__":
    warnings.filterwarnings("ignore")
    np.random.seed(40)

    # Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
    wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "wine-quality.csv")
    data = pd.read_csv(wine_path)

    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5
    l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    lr.fit(train_x, train_y)
    isp = mlsql.params()["internalSystemParam"]
    tempModelLocalPath = isp["tempModelLocalPath"]
    if not os.path.exists(tempModelLocalPath):
        os.makedirs(tempModelLocalPath)
    with open(tempModelLocalPath + "/model.pkl", "wb") as f:
        pickle.dump(lr, f)
Пример #9
0
def param(key, value):
    if key in mlsql.params()["fitParams"]:
        res = mlsql.params()["fitParams"][key]
    else:
        res = value
    return res
import tensorflow as tf
import mlsql_model
import mlsql
import sys
import mlsql_tf

rd = mlsql.read_data()
p = mlsql.params()

fitParams = p["fitParam"]

tf.reset_default_graph
config = tf.ConfigProto()

gpuPercent = float(mlsql.get_param(fitParams, "gpuPercent", -1))
featureSize = int(mlsql.get_param(fitParams, "featureSize", -1))
wordEmbeddingSize = int(mlsql.get_param(fitParams, "wordEmbeddingSize", -1))
sequenceLen = featureSize / wordEmbeddingSize

label_size = int(mlsql.get_param(fitParams, "labelSize", -1))
layer_group = [
    int(i) for i in mlsql.get_param(fitParams, "layerGroup", "300").split(",")
]

print_interval = int(mlsql.get_param(fitParams, "printInterval", 1))

window_group = [
    int(i)
    for i in mlsql.get_param(fitParams, "windowGroup", "5,10,15").split(",")
]
Пример #11
0
def udf(func):
    import mlsql
    p = mlsql.params()
    func_path = p["systemParam"]["funcPath"]
    write_binary_file(func_path, func)
Пример #12
0
def param(key, value):
    if key in mlsql.params()["fitParams"]:
        res = mlsql.params()["fitParams"][key]
    else:
        res = value
    return res