示例#1
0
def demo_xlearn_0():
    # Generate predictions
    fm_model = xl.FMModel(task='binary', init=0.1, epoch=100, k=16, lr=0.1, reg_lambda=0.01, opt='sgd',
                          n_jobs=4, metric='auc', stop_window=10)  # log=str(BASE_DIR)+'/xlearn.log'
    fm_model.fit(X_tr, y_tr, eval_set=[X_te, y_te], is_lock_free=False)
    y_pred = fm_model.predict(X_te)
    print(y_pred, type(y_pred))
 def train(X, y):
     model = xl.FMModel(task='binary',
                        init=0.1,
                        epoch=10,
                        k=4,
                        lr=0.1,
                        reg_lambda=0.01,
                        opt='sgd',
                        metric='auc')
     model.fit(X, y)
     # print model weights
     print('====>>>> weights of FM-Model: {}'.format(model.weights))
     return model
示例#3
0
def train_model(train_X, valid_X, train_y, valid_y):
    """训练模型"""

    fm_model = xl.FMModel(lr=0.02,
                          reg_lambda=0.001,
                          k=18,
                          epoch=10,
                          stop_window=4)

    fm_model.fit(train_X, train_y)
    y_pred = fm_model.predict(valid_X)

    fpr, tpr, thresholds = roc_curve(valid_y, np.array(y_pred))
    aucs = auc(fpr, tpr)
    print(aucs)
示例#4
0
    def create_fm_model(cls,
                        train_x,
                        train_y,
                        dev_x=None,
                        dev_y=None,
                        model_output=None,
                        iterations=100,
                        thread_count=4,
                        task='binary',
                        k=16,
                        lr=0.1,
                        metric='auc',
                        stop_window=100):
        '''
        因子分解机模型
        :param train_x:         训练集特征
        :param train_y:         训练集标签
        :param dev_x:           验证集特征
        :param dev_y:           验证集标签
        :param iterations:      迭代次数
        :param model_output:    模型保存路径
        :param thread_count:    CPU核数
        :param task:            任务类型
        :param k:               隐向量维度
        :param lr:              学习率
        :param metric:          评测函数
        :param stop_window:     Early-Stop
        :return:
        '''
        fm_model = xl.FMModel(task=task,
                              init=0.1,
                              k=k,
                              lr=lr,
                              reg_lambda=0.01,
                              epoch=iterations,
                              opt='sgd',
                              n_jobs=thread_count,
                              metric=metric,
                              stop_window=stop_window)
        if dev_x and dev_y:
            fm_model.fit(train_x,
                         train_y,
                         eval_set=[dev_x, dev_y],
                         is_lock_free=False)
        else:
            fm_model.fit(train_x, train_y, is_lock_free=False)

        return fm_model
示例#5
0
def FM(X_train, y_train, X_val, y_val):
    fm_model = xl.FMModel(task='binary',
                          epoch=100,
                          lr=0.05,
                          reg_lambda=0.00002,
                          k=40,
                          opt='adagrad',
                          nthread=8,
                          metric='auc')  # k = latent factor size

    fm_model.fit(X_train,
                 y_train,
                 eval_set=[X_val, y_val],
                 is_instance_norm=True)

    y_pred = fm_model.predict(X_val)
    return y_pred
示例#6
0
ffm_model.setTrain(dataTrain)
ffm_model.setValidate(dataTest)

param={'task':'binary',
       'lr':0.2,
       'lambda':0.002,
       'metric':'acc'}

ffm_model.fit(param,"./model.out")
ffm_model.setTest(dataTest)
ffm_model.predict("./model.out","model./output.txt")
'''
linear_model = xl.FMModel(task='binary',
                          lr=0.2,
                          k=10,
                          init=0.1,
                          epoch=500,
                          reg_lambda=0.002,
                          metric='acc')

linear_model.fit(dataTrain, labelTrain, eval_set=[dataTest, labelTest])

y_pred = linear_model.predict(dataTest)
y_pred = list(y_pred)
print(y_pred)
error = 0
item = 0
for i in range(len(y_pred)):
    item += 1
    if y_pred[i] < 0.5 and labelTest[i] == 1:
        error += 1
示例#7
0
X_val,      \
y_train,    \
y_val = train_test_split(X, y, test_size=0.2, random_state=0)

# Standardize input
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# param:
#  0. binary classification
#  1. model scale: 0.1
#  2. epoch number: 10 (auto early-stop)
#  3. number of latent factor: 4
#  4. learning rate: 0.1
#  5. regular lambda: 0.01
#  6. use sgd optimization method
#  7. evaluation metric: accuarcy
fm_model = xl.FMModel(task='binary',
                      init=0.1,
                      epoch=10,
                      k=4,
                      lr=0.1,
                      reg_lambda=0.01,
                      opt='sgd',
                      metric='acc')
# Start to train
fm_model.fit(X_train, y_train, eval_set=[X_val, y_val])

# Generate predictions
y_pred = fm_model.predict(X_val)
示例#8
0
# y = (iris_data['target'] == 2)
#
# X_train,X_val,y_train,y_val = train_test_split(X, y, test_size=0.3, random_state=0)

# param:
#  0. binary classification
#  1. model scale: 0.1
#  2. epoch number: 10 (auto early-stop)
#  3. learning rate: 0.1
#  4. regular lambda: 1.0
#  5. use sgd optimization method
# linear_model = xl.LRModel(task='binary', init=0.1,
#                           epoch=10, lr=0.1,
#                           reg_lambda=1.0, opt='sgd')
param = {'task': 'binary', 'lr': 0.2, 'lambda': 0.002, 'metric': 'acc'}
model = xl.FMModel()

model.fit(X_train, y_train)
preds = model.predict(X_test)
print("FMModel ROC AUC:%.3f" % roc_auc_score(y_true=y_test, y_score=preds))
print("FMModel accuracy_scorer:%.3f" %
      accuracy_score(y_true=y_test, y_pred=preds))

# Start to train
model.fit(X_train, y_train, eval_set=[X_test, y_test], is_lock_free=False)

y_submission = model.predict(X_submission)

y_pred = pd.DataFrame()
y_pred["PassengerId"] = data_test["PassengerId"]
y_pred["Survived"] = y_submission
示例#9
0
# param:
#  0. binary classification
#  1. model scale: 0.1
#  2. epoch number: 10 (auto early-stop)
#  3. learning rate: 0.1
#  4. regular lambda: 1.0
#  5. use sgd optimization method
fm_model = xl.FMModel(model_type='fm',
                      task='binary',
                      metric='auc',
                      block_size=500,
                      lr=0.2,
                      k=4,
                      reg_lambda=0.1,
                      init=0.1,
                      fold=5,
                      epoch=5,
                      stop_window=2,
                      opt='sgd',
                      nthread=None,
                      n_jobs=4,
                      alpha=1,
                      beta=1,
                      lambda_1=1,
                      lambda_2=1)

# Start to train
fm_model.fit(X_train, y_train, eval_set=[X_val, y_val], is_lock_free=False)

# Generate predictions
y_pred = fm_model.predict(X_val)
print(y_pred)
示例#10
0
文件: fm.py 项目: ankane/xlearn
import xlearn as xl
import pandas as pd

df = pd.read_csv('test/support/data.txt',
                 sep=' ',
                 names=['y', 'x0', 'x1', 'x2', 'x3'])

X = df.drop(columns=['y'])
y = df['y']

model = xl.FMModel(task='reg', nthread=1, opt='adagrad')
model.fit(X, y)
print('weights', model.weights)
print('predict', model.predict(X)[0:6].tolist())

model = xl.FMModel(task='reg', nthread=1, opt='adagrad')
model.fit('test/support/data.txt')
print('predict txt', model.predict('test/support/data.txt')[0:6].tolist())

model = xl.FMModel(task='reg', nthread=1, opt='adagrad')
model.fit('test/support/data.csv')
print('predict csv', model.predict('test/support/data.csv')[0:6].tolist())
示例#11
0
xTest = xTest.fillna(-999)

for column_name in DataTestFin.columns:
        if DataTestFin[column_name].dtype.name == 'category':
            DataTestFin.loc[:, column_name] = le.fit_transform(DataTestFin[column_name].astype(str))
        else:
            pass

DataTestFin = DataTestFin.fillna(-999)



ffm_model = xl.FMModel(task='binary', 
                        lr=0.001, 
                        epoch=100, 
                        reg_lambda=0.01,
                        metric='auc')

print("Fitting Model...")

# Start to train
# Directly use string to specify data source
ffm_model.fit(xTrain, yTrain, eval_set=[xTest, yTest])

# print model weights
#print(ffm_model.weights)

print("Generating predictions")
# Generate predictions
y_pred = ffm_model.predict(DataTestFin)
示例#12
0
from KmmtML.Utils.Util import Util
import numpy as np
import pandas as pd

# Training task
ffm_model = xl.create_ffm()                # Use field-aware factorization machine (ffm)
ffm_model.setTrain("./small_train.txt")    # Set the path of training data

fm_model = xl.create_fm()
fm_model.fit()
# parameter:
#  0. task: binary classification
#  1. learning rate : 0.2
#  2. regular lambda : 0.002
param = {'task':'binary', 'lr':0.2, 'lambda':0.002}

path = "/Users/jianjun.yue/PycharmGItHub/data/titanic/train_pre.csv"
data = pd.read_csv(path)
print("--------------RandomForestClassifier---------------")
predictors = ["Survived", "Pclass", "Sex", "Age", "SibSp", "Parch", "Fare_scaler", "Embarked", "NameLength"]
train = data[predictors]

dt= Util.CSV2Libsvm(train)

print(dt)

xl.FMModel(task='binary', init=0.1,
                          epoch=10, lr=0.1,
                          reg_lambda=1.0, opt='sgd')
fm=xl.FMModel()
fm.fit()
示例#13
0
文件: test3.py 项目: hyusak/KmmtML
import xlearn as xl
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load dataset
iris_data = load_iris()
X = iris_data['data']
y = (iris_data['target'] == 2)

X_train,   \
X_val,     \
y_train,   \
y_val = train_test_split(X, y, test_size=0.3, random_state=0)

# param:
#  0. binary classification
#  1. model scale: 0.1
#  2. epoch number: 10 (auto early-stop)
#  3. learning rate: 0.1
#  4. regular lambda: 1.0
#  5. use sgd optimization method
fm = xl.FMModel()

# Start to train
fm.fit(X_train, y_train, eval_set=[X_val, y_val], is_lock_free=False)

# Generate predictions
y_pred = fm.predict(X_val)
print(y_pred)
print("------------------------")
print(y_val)
示例#14
0
    elif sam_apr_name == "pop":
        pass
    elif sam_apr_name == 'top_dis_pop':
        pass

    assert args.algo['name'] in ["FM", "xlearn_FM"]
    if args.algo['name'] == "FM":
        algo = factorization_machine.FMRegression(rank=8,
                                                  n_iter=100,
                                                  l2_reg_w=0.1,
                                                  l2_reg_V=0.1)
    if args.algo['name'] == "xlearn_FM":
        algo = xl.FMModel(task='reg',
                          init=0.1,
                          epoch=10,
                          k=4,
                          lr=0.2,
                          reg_lambda=0.01,
                          opt='sgd',
                          metric='mae')

    if '\r' in args.features:
        args.features.remove('\r')
    features = args.features

    print('------loading data------')
    cd = ConstructData(dataset=args.dataset,
                       features=features,
                       test=args.test_flag,
                       sampling_approach=args.sampling_approach,
                       negative_ratio=args.negative_ratio,
                       n_clusters=args.n_clusters)
示例#15
0
def model(train_df=None, test_df=None, not_used_cols=None):
    logger.info('Start prepare model')
    train_df = train_df.sort_values('date')

    # format_str = '%Y%m%d'
    # train_df['date2'] = train_df['date2'].apply(lambda x: datetime.strptime(str(x), format_str))
    # split_date = '2017-05-31'
    # dev_df = train_df.loc[train_df['date2'] <= split_date]
    # valid_df = train_df.loc[train_df['date2'] > split_date]
    # dev_y = np.log1p(dev_df["totals_transactionRevenue"].values)
    # dev_df = dev_df.drop(not_used_cols, axis=1)
    # valid_y = np.log1p(valid_df["totals_transactionRevenue"].values)
    # valid_df = valid_df.drop(not_used_cols, axis=1)

    X = train_df.drop(not_used_cols, axis=1)
    y = train_df['totals_transactionRevenue']
    X_test = test_df.drop(
        [col for col in not_used_cols if col in test_df.columns], axis=1)

    folds = KFold(n_splits=N_SPLITS, shuffle=True, random_state=20180917)

    ## Model
    logger.info('Start tuning model')
    # space = space_lightgbm()
    # clf = LGBMRegressor(n_jobs=-1, random_state=56, objective='regression', verbose=-1)
    # model = tune_model(dev_df, dev_y, valid_df, valid_y, clf,
    #                    space=space, metric=rmse, n_calls=50, min_func=forest_minimize)
    params = {
        "objective": "regression",
        "metric": "rmse",
        "max_depth": 8,
        "min_child_samples": 40,
        "reg_alpha": 0.4,
        "reg_lambda": 0.1,
        "num_leaves": 297,
        "learning_rate": 0.01,
        "subsample": 0.8,
        "colsample_bytree": 0.97
    }

    # Try Stacknet
    models = [
        ######## First level ########
        [
            RandomForestRegressor(n_estimators=500, random_state=1, n_jobs=-1),
            ExtraTreesRegressor(n_estimators=500, random_state=1, n_jobs=-1),
            LGBMRegressor(n_jobs=-1,
                          random_state=56,
                          objective='regression',
                          max_depth=8,
                          min_child_samples=40,
                          reg_alpha=0.4,
                          reg_lambda=0.1,
                          num_leaves=290,
                          learning_rate=0.01,
                          subsample=0.8,
                          colsample_bytree=0.9,
                          n_estimators=520),
            xl.FMModel(task='reg',
                       metric='rmse',
                       block_size=800,
                       lr=0.05,
                       k=12,
                       reg_lambda=0.05,
                       init=0.1,
                       fold=1,
                       epoch=50,
                       stop_window=5,
                       opt='ftrl',
                       nthread=0,
                       n_jobs=-1,
                       alpha=0.05,
                       beta=1,
                       lambda_1=0.1,
                       lambda_2=0.1),
            Ridge(random_state=1)
        ],
        ######## Second level ########
        [RandomForestRegressor(n_estimators=500, random_state=42, n_jobs=-1)]
    ]

    model = StackNetRegressor(models,
                              metric="rmse",
                              folds=3,
                              restacking=True,
                              use_retraining=True,
                              random_state=12345,
                              n_jobs=1,
                              verbose=1)

    model.fit(X, y)
    prediction = model.predict(X_test)

    return prediction