Пример #1
0
    def train_ffm(self,
                  trainset_path,
                  testset_path,
                  model_save_path,
                  suffix=""):
        """
            self.config_params["workdir"] + "ffm/ffm_train_java.txt"
            self.config_params["workdir"] + "ffm/ffm_test_java.txt"
            self.config_params["workdir"] + "ffm/ffm.txt"
        :return:
        """
        ffm_model = xl.create_ffm()  # Use field-aware factorization machine
        ffm_model.disableNorm()  # instance-wise normalization
        ffm_model.setTrain(trainset_path)  # Training data
        ffm_model.setValidate(testset_path)  # Validation data
        ffm_model.setSigmoid()
        param = self.config_params[self.algo]
        if "ffm" in param:
            param = param["ffm"]
        if "phase" in param:
            param.pop("phase")
        if "model_cnt" in param:
            param.pop("model_cnt")

        model_txt_path = model_save_path + "ffm_{}{}.txt".format(
            self.config_params["channelid"], suffix)
        model_binary_path = model_save_path + "ffm_{}{}.out".format(
            self.config_params["channelid"], suffix)

        ffm_model.setTXTModel(model_txt_path)
        ffm_model.fit(param, model_binary_path)

        self.evaluation([model_binary_path], trainset_path, testset_path)
Пример #2
0
    def train_ffm(self, trainset_path, testset_path, model_save_path):
        """
            self.config_params["workdir"] + "ffm/ffm_train_java.txt"
            self.config_params["workdir"] + "ffm/ffm_test_java.txt"
            self.config_params["workdir"] + "ffm/ffm.txt"
        :return:
        """
        ffm_model = xl.create_ffm()  # Use field-aware factorization machine
        ffm_model.disableNorm()  # instance-wise normalization
        ffm_model.setTrain(trainset_path)  # Training data
        ffm_model.setValidate(testset_path)  # Validation data
        ffm_model.setSigmoid()
        param = self.config_params["xgboost_plus_ffm"]["ffm"]

        model_txt_path = model_save_path + "ffm.txt"
        model_binary_path = model_save_path + "ffm.out"

        ffm_model.setTXTModel(model_txt_path)
        ffm_model.fit(param, model_binary_path)

        ffm_model.setTest(trainset_path)
        ffm_model.predict(
            model_binary_path,
            self.config_params["workdir"][platform.system()] +
            "ffm/train_predict.txt")
        ffm_model.setTest(testset_path)
        ffm_model.predict(
            model_binary_path,
            self.config_params["workdir"][platform.system()] +
            "ffm/test_predict.txt")
Пример #3
0
    def evaluation(self, model_binary_path_list, trainset_path, testset_path):
        ffm_model = xl.create_ffm()  # Use field-aware factorization machine
        ffm_model.disableNorm()  # instance-wise normalization
        ffm_model.setTrain(trainset_path)  # Training data
        ffm_model.setValidate(testset_path)  # Validation data
        ffm_model.setSigmoid()

        workdir = self.config_params["workdir"][platform.system()]
        if len(model_binary_path_list) > 1:
            for idx, model_binary_path in enumerate(model_binary_path_list):
                ffm_model.setTest(trainset_path)
                ffm_model.predict(
                    model_binary_path,
                    workdir + "ffm/train_predict_{}.txt".format(idx + 1))
                ffm_model.setTest(testset_path)
                ffm_model.predict(
                    model_binary_path,
                    workdir + "ffm/test_predict_{}.txt".format(idx + 1))
        else:
            ffm_model.setTest(trainset_path)
            ffm_model.predict(model_binary_path_list[0],
                              workdir + "ffm/train_predict.txt")
            ffm_model.setTest(testset_path)
            ffm_model.predict(model_binary_path_list[0],
                              workdir + "ffm/test_predict.txt")
Пример #4
0
def ffm_predict():
    ffm_model = xl.create_ffm()
    ffm_model.setOnDisk()
    ffm_model.setTest('encoded_test_file.ffm')
    ffm_model.setSigmoid()

    ffm_model.predict('model/model2.out', 'output/output2.res')
def train(k, lam, lr, early, epoch, train_fname, valid_fname, test_fname,
          pred_fname):
    model_fname = "/tmp/model.out"
    param = {
        'task': 'reg',
        'lr': 0.1,
        'lambda': 1,
        'metric': 'rmse',
        'opt': 'adagrad',
        'k': 60,
        'stop_window': 1,
        'epoch': 1000,
    }
    param['k'] = k
    param['lambda'] = lam
    param['lr'] = lr
    param['epoch'] = epoch

    model = xl.create_ffm()
    model.setTrain(train_fname)
    model.setValidate(valid_fname)
    model.setTest(test_fname)
    model.disableNorm()
    if not early:
        model.disableEarlyStop()
    model.fit(param, model_fname)
    model.predict(model_fname, pred_fname)
Пример #6
0
def main():
    '''
    前期准备工作
    小样本用 data_s/  res_s/ 全部用data/ res/
    '''
    if datasetflag == 'all':
        raw_data_name='data.csv'
        inputpath='data/'
        outputpath='res/' + 'FFM_' + datetime.now().strftime("%Y%m%d_%H%M%S")+'/'
    else:
        raw_data_name='data_s.csv'
        inputpath='data_s/'
        outputpath='res_s/' + 'FFM_' + datetime.now().strftime("%Y%m%d_%H%M%S")+'/'
    os.mkdir(outputpath)

    ''' Preprocess '''
    if preprocessflag == 'on':
        preprocess.preprocess(inputfilename=raw_data_name,outputpath=inputpath)
    if FFM_preprocessflag == 'on':
        # preprocess.FFM_preprocess(inputpath)
        preprocess.convertion(inputpath)

    ''' training '''
    ffm_model = xl.create_ffm()
    ffm_model.setTrain(inputpath+'train.txt')
    ffm_model.setValidate(inputpath+'evals.txt')
    ffm_model.fit(parameter, outputpath+'model.out')
    # ffm_model.cv(parameter)
    ffm_model.setTest(inputpath+'test.txt')

    ffm_model.setSigmoid()
    ffm_model.predict(outputpath+'model.out',outputpath+'res.txt')

     turn txt to final result 
Пример #7
0
 def FFMtrain(self):
     ffm_model = xl.create_ffm()
     param = {
         'task': 'binary',
         'lr': 0.02,
         'lambda': 0.002,
         'metric': 'auc'
     }
     ffm_model.setTrain("./data/train_ffm.csv")
     ffm_model.fit(param, "./data/model.out")
Пример #8
0
def train_ffm(
        train_libffm_path, test_libffm_path, model_path, pred_path, param):
    print_log(f'Training ffm from {train_libffm_path}')
    ffm = xl.create_ffm()
    ffm.setTrain(train_libffm_path)
    ffm.fit(param, model_path)

    ffm.setSigmoid()
    ffm.setTest(test_libffm_path)
    ffm.predict(model_path, pred_path)
Пример #9
0
    def test(self):
        ffm_model = xl.create_ffm()
        # Prediction task
        ffm_model.setTest(self.test_data_path)  # Set the path of test dataset
        ffm_model.setSigmoid()  # Convert output to 0-1

        # Start to predict
        # The output result will be stored in output.txt
        res = ffm_model.predict(os.path.join(self.save_path, 'ffm.model'))
        print()
Пример #10
0
    def __init__(self, model_type):
        assert model_type in ["FM","FFM","linear"]   # 只能是这三种模型
        self.model_type = model_type

        if self.model_type == "FM":
            self.model = xl.create_fm()
        elif self.model_type == "FFM":
            self.model = xl.create_ffm()
        else:   # 采用默认的线性模型
            self.model = xl.create_linear()
Пример #11
0
def creat_model(model_type):  # 创建模型对象
    if model_type == "FM":
        model = xl.create_fm()

    elif model_type == "FFM":
        model = xl.create_ffm()

    else:  # 采用默认的线性模型
        model = xl.create_linear()

    return model
Пример #12
0
 def FFMtest(self):
     ffm_model = xl.create_ffm()
     param = {
         'task': 'binary',
         'lr': 0.02,
         'lambda': 0.002,
         'metric': 'auc'
     }
     ffm_model.setTest("./data/test_ffm.csv")
     ffm_model.setSigmoid()
     ffm_model.predict("./data/model.out", "./data/output.txt")
Пример #13
0
def done(op_type='istrain'):
    ### 开始训练
    ffm_model = xl.create_ffm()
    logging.debug('设置参数')
    if op_type == 'istrain':
        logging.debug("开始训练")
        ffm_model.setTrain(FLAGS.tmp_data_path + 'ont_hot_train.libffm.csv')
        ffm_model.setValidate(FLAGS.tmp_data_path + 'ont_hot_vali.libffm.csv')
        ffm_model.disableEarlyStop()
        ffm_model.fit(param, FLAGS.tmp_data_path + 'ffm_model.out')

        logging.debug("to save validation predictions ...")
        #        ret=dump(ffm_model, FLAGS.out_data_path+'1-'+'-ffm_model.model.joblib_dat')
        #        logging.debug(ret)
        ffm_model.show()
        logging.debug(ffm_model)
    elif op_type == 'cv':
        logging.debug("开始 CV")
        ffm_model.setTrain(FLAGS.tmp_data_path + 'ont_hot_train.libffm.csv')
        ffm_model.cv(param)

        logging.debug("to save validation predictions ...")
        logging.debug(ffm_model)
    else:

        #        ffm_model = load(FLAGS.out_data_path+'1-'+'-ffm_model.model.joblib_dat')
        #        logging.debug(gbm.get_params())
        ### 线下预测

        test_save = FLAGS.tmp_data_path + 'ont_hot_test.libffm.csv'
        logging.debug("预测")
        ffm_model.setTest(test_save)  # Test data
        ffm_model.setSigmoid()  # Convert output to 0-1
        ffm_model.predict(FLAGS.tmp_data_path + 'ffm_model.out',
                          FLAGS.tmp_data_path + 'output.txt')
        ffm_model.show()
        fp = open(FLAGS.tmp_data_path + 'output.txt', 'r')
        dtrain_predprob = fp.readlines()
        logging.debug(ffm_model)
        y_pred = [
            round(float(value.replace('\n', '')), 4)
            for value in dtrain_predprob
        ]
        logging.debug('-' * 30)
        y_pred = np.array(y_pred).reshape(-1, 1)
        logging.debug(y_pred.shape)
        test_id = pd.read_csv(FLAGS.test_id_path + 'test_id.csv')
        logging.debug(test_id['id'].shape)
        test_id['id'] = test_id['id'].map(int)
        test_id['click'] = y_pred
        test_id.to_csv(FLAGS.out_data_path + '1-' + 'ffm_model.test.csv',
                       index=False)
Пример #14
0
def createffm():
    ffm_model = xl.create_ffm()
    # ffm_model.setOnDisk()
    ffm_model.setTrain(train_path)
    ffm_model.setValidate(test_path)
    param = {
        'lambda': 0.00002,
        'lr': 0.05,
        'task': 'binary',
        'k': 8,
        'metric': 'auc'
    }
    ffm_model.fit(param, "./model.out")
Пример #15
0
    def eval_yyh(self, model_binary_path_list, n, testset_path):
        ffm_model = xl.create_ffm()  # Use field-aware factorization machine
        ffm_model.disableNorm()  # instance-wise normalization
        # ffm_model.setTrain(trainset_path)  # Training data
        ffm_model.setValidate(testset_path)  # Validation data
        ffm_model.setSigmoid()

        workdir = self.config_params["workdir"][platform.system()]
        # ffm_model.setTest(trainset_path)
        # ffm_model.predict(model_binary_path_list[0], workdir + "ffm/train_predict.txt")
        ffm_model.setTest(testset_path)
        ffm_model.predict(model_binary_path_list[0],
                          workdir + "ffm/test_predict_yyh_{}.txt".format(n))
Пример #16
0
def ffm_setting():
    # Training task
    ffm_model = xl.create_ffm()  # Use field-aware factorization machine
    ffm_model.setTrain("./small_train.txt")   # Training data
    ffm_model.setValidate("./small_test.txt")  # Validation data

    # param:
    #  0. binary classification
    #  1. learning rate : 0.2
    #  2. regular lambda : 0.002
    param = {'task': 'binary', 'lr': 0.2, 'lambda': 0.002}

    # Train model
    ffm_model.fit(param, "./model.out")
Пример #17
0
    def train_ffm(self,
                  trainset_path,
                  testset_path,
                  model_save_path,
                  suffix=""):
        """
            self.config_params["workdir"] + "ffm/ffm_train_java.txt"
            self.config_params["workdir"] + "ffm/ffm_test_java.txt"
            self.config_params["workdir"] + "ffm/ffm.txt"
        :return:
        """
        ffm_model = xl.create_ffm()  # Use field-aware factorization machine
        ffm_model.disableNorm()  # instance-wise normalization
        ffm_model.setTrain(trainset_path)  # Training data
        ffm_model.setValidate(testset_path)  # Validation data
        ffm_model.setSigmoid()
        param = self.config_params[self.algo]
        if "ffm" in param:
            param = param["ffm"]
        if "phase" in param:
            param.pop("phase")
        if "model_cnt" in param:
            param.pop("model_cnt")

        model_txt_path = model_save_path + "ffm{}.txt".format(suffix)
        model_binary_path = model_save_path + "ffm{}.out".format(suffix)

        ffm_model.setTXTModel(model_txt_path)
        ffm_model.fit(param, model_binary_path)

        #self.evaluation([model_binary_path], trainset_path, testset_path)
        with open(
                '/data/yangyuhan/workspace/ctr/hdfs_ctr/ffm/ffm_test_python.txt',
                "r",
                encoding="utf-8") as file_read:
            data = [line.split(' ') for line in file_read]
        field_all = [str(i) + ":" for i in range(28)]
        for i in range(len(field_all)):
            field = field_all[i]
            test_yyh_path = self.config_params["workdir"][
                platform.system()] + 'ffm/ffm_test_python_yyh_{}.txt'.format(i)
            with open(test_yyh_path, "w", encoding="utf-8") as file_write:
                for items in data:
                    tmp = []
                    for it in items:
                        if not it.startswith(field):
                            tmp.append(it)
                    file_write.write(" ".join([str(l) for l in tmp]))
            self.eval_yyh([model_binary_path], i, test_yyh_path)
Пример #18
0
    def __init__(self,
                 URM_train,
                 train_svm_file_path,
                 approximate_recommender: BaseRecommender,
                 ICM_train=None,
                 UCM_train=None,
                 item_feature_fields=None,
                 user_feature_fields=None,
                 valid_svm_file_path=None,
                 max_items_to_predict=1000,
                 model_filename="model.out",
                 model_type="ffm",
                 temp_relative_folder="temp/",
                 verbose=True):
        self.ICM_train = ICM_train
        self.UCM_train = UCM_train
        user_fields = np.full(shape=URM_train.shape[0], fill_value=0)
        item_fields = np.full(shape=URM_train.shape[1], fill_value=1)
        if item_feature_fields is not None:
            item_feature_fields = item_feature_fields + 2
        if user_feature_fields is not None:
            user_feature_fields = user_feature_fields + np.max(
                item_feature_fields) + 1
        self.fields = np.concatenate([
            user_fields, item_fields, item_feature_fields, user_feature_fields
        ])

        self.approximate_recommender = approximate_recommender
        self.max_items_to_predict = max_items_to_predict

        # Set path of temp folder and model_path
        root_path = get_project_root_path()
        fm_data_path = os.path.join(root_path, "resources", "ffm_data")
        self.temp_folder = os.path.join(fm_data_path, temp_relative_folder)
        self.model_folder = os.path.join(fm_data_path, "model")
        self.model_path = os.path.join(self.model_folder, model_filename)

        if model_type == "ffm":
            self.model = xl.create_ffm()
        elif model_type == "fm":
            self.model = xl.create_fm()
        else:
            raise ValueError(
                "model_type is inexistent, choose between ffm and fm")
        self.model.setTrain(train_svm_file_path)
        if valid_svm_file_path is not None:
            self.model.setValidate(valid_svm_file_path)

        super().__init__(URM_train, verbose)
Пример #19
0
 def predict(self, path_tst, model_path, xlfm_params=XLFM_PARAMS_DEFAULT):
     submission_path = '%s/submission-%d.csv' % (self.artifacts_dir,
                                                 int(time()))
     ffm_model = xl.create_ffm()
     ffm_model.setTest(path_tst)
     ffm_model.setSigmoid()
     ffm_model.predict(model_path, submission_path)
     with open(submission_path) as fp:
         yp = [float(l.strip()) for l in fp]
     df = pd.DataFrame({'id': list(range(len(yp))), 'target': yp})
     df['target'] = df['target'].astype(np.float32)
     self.logger.info('yp mean %.3lf' % df['target'].mean())
     self.logger.info('%d rows' % len(df['target']))
     df.to_csv(submission_path, index=False)
     self.logger.info('Saved %s' % submission_path)
Пример #20
0
    def _train(self, params):
        ffm_model = xl.create_ffm()
        ffm_model.setTrain(os.path.join(self.data_dir, self.train_name))

        print(params)
        ffm_model.cv(params)

        ffm_model.fit(params, os.path.join(self.model_dir, self.model_name))

        ffm_model.setTest(os.path.join(self.data_dir, self.test_name))
        ffm_model.predict(os.path.join(self.model_dir, self.model_name),
                          os.path.join(self.submission_dir, 'ffmoutput.txt'))

        # wait for the answer of how to get loss value in xlearn
        cv_error = 0
        return cv_error, params
Пример #21
0
def xl_objective(params):

    start = time()

    xl_objective.i+=1

    params['task'] = 'reg'
    params['metric'] = 'rmse'
    params['stop_window'] = 3

    # remember hyperopt casts as floats
    params['epoch'] = int(params['epoch'])
    params['k'] = int(params['k'])

    xl_model = xl.create_ffm()
    xl_model.setTrain(train_data_file)
    # xl_model.setValidate(valid_data_file_opt)
    xl_model.setTest(valid_data_file)
    # xl_model.setQuiet()
    xl_model.fit(params, xlmodel_fname_tmp)
    xl_model.predict(xlmodel_fname_tmp, xlpreds_fname_tmp)

    preds = np.loadtxt(xlpreds_fname_tmp)
    df_preds['interest'] = preds

    df_ranked = df_preds.sort_values(['user_id_hash', 'interest'],
        ascending=[False, False])
    df_ranked = (df_ranked
        .groupby('user_id_hash')['coupon_id_hash']
        .apply(list)
        .reset_index())
    recomendations_dict = pd.Series(df_ranked.coupon_id_hash.values,
        index=df_ranked.user_id_hash).to_dict()

    actual = []
    pred = []
    for k,_ in recomendations_dict.items():
        actual.append(list(interactions_valid_dict[k]))
        pred.append(list(recomendations_dict[k]))

    score = mapk(actual,pred)
    end = round((time() - start)/60.,2)

    print("INFO: iteration {} was completed in {} min. Score {:.3f}".format(xl_objective.i, end, score))

    return 1-score
Пример #22
0
def predict():
    ffm = xl.create_ffm()
    _, _, test = splitFFM()

    ffm.setTest(test)
    ffm.setSigmoid()

    folder = './modelFFM'
    model = sorted(glob.glob('./modelFFM/xlModel_*.txt'))[-1]
    output = model.replace('Model', 'Output')
    ffm.predict(model, output)

    df = getMerged('aid', 'uid', kind=2)
    df['score'] = np.loadtxt(output)
    df.to_csv('submission.csv', index=False)

    zipName = '%s/submission.zip' % folder
    with zipfile.ZipFile(zipName, 'w') as f:
        f.write('submission.csv', compress_type=zipfile.ZIP_DEFLATED)
Пример #23
0
def predict_behavior_type(variable):
    # 获取数据
    test = get_new_user_data(variable=variable)
    # 预测数据
    # 最后处理
    test = end_processing(test)
    # 调用XGB模型
    XGB = joblib.load("C:\\Users\\dell--pc\\Desktop\\RecommenderSystem\\Model\\GbdtFFmFit\\XGB_FFM.model")
    # 获取叶子节点数据
    new_test = XGB.apply(test.values)
    # 转换数据为ffm需要的格式 DMatrix
    new_test = xlearn.DMatrix(new_test)
    # 调用FFM模型
    ffm_model = xlearn.create_ffm()
    ffm_model.setSign()
    ffm_model.setQuiet()
    ffm_model.setOnDisk()
    ffm_model.setTest(new_test)
    predict_behavior_type = ffm_model.predict(
        "C:\\Users\\dell--pc\\Desktop\\RecommenderSystem\\Model\\GbdtFFmFit\\model_dm.out")

    data_result = pd.DataFrame()
    data_result['user_id'] = test.user_id
    data_result['category_id'] = test.category_id
    data_result['item_id'] = test.item_id
    data_result['predict_result'] = predict_behavior_type
    data_result['predict_result'] = data_result['predict_result'].apply(lambda x: random.randint(0,1))
    data_result = data_result.loc[data_result.predict_result == 1]
    data_result['predict_result'] = connect_item_name(list(data_result['item_id']))
    if variable=='1':
        predict_result_to_Bmob(data_result[:2])
    else:
        engine = create_engine("mysql+pymysql://root:123456@localhost:3306/mysql?charset=utf8")
        data_result = data_result[:3]
        data_result.to_sql(name='predict_result_gbdt_java', con=engine, if_exists='replace',
                           index=False, index_label=False, chunksize=5000,
                           dtype={
                               'user_id':VARCHAR(length=20),
                               'category_id':VARCHAR(length=20),
                               'item_id':VARCHAR(length=20),
                               'predict_result':VARCHAR(length=20)
                           })
Пример #24
0
    def fit(self,
            df,
            label,
            eva_df=None,
            eva_label=None,
            path='datasource/train.ffm',
            overwrite_path=True,
            eva_path='datasource/valid.ffm',
            model_path='datasource/ffm_model.out',
            overwrite_eva_path=True):
        if (eva_df is None) ^ (eva_label is None):
            raise Exception(
                'params eva_df, eva_df must be all None or all have value.')

        df.index = range(df.shape[0])
        label.index = range(label.shape[0])

        if self.model_type == 'lr':
            self.clf = xl.create_ffm()
        elif self.model_type == 'fm':
            self.clf = xl.create_fm()
        elif self.model_type == 'ffm':
            self.clf = xl.create_linear()
        else:
            raise ValueError(self.model_type,
                             ' is an invalid value for param cat.')

        self.fe = FFMEncoder(df)
        self.fe.fit(df, self.cutoff)
        self.fe.transform(df, label, path)
        if eva_df is not None:
            eva_df.index = range(eva_df.shape[0])
            eva_label.index = range(eva_label.shape[0])
            self.fe.transform(eva_df, eva_label, eva_path)

        self.clf.setTrain(path)
        if eva_df is not None:
            self.clf.setValidate(eva_path)

        self.clf.fit(self.params, model_path)
        self.model_path = model_path
Пример #25
0
    def train(self):
        # Training task
        ffm_model = xl.create_ffm(
        )  # Use field-aware factorization machine (ffm)
        ffm_model.setTrain("../data/movie_lens_100k/train.txt"
                           )  # Set the path of training dataset
        ffm_model.setValidate("../data/movie_lens_100k/test.txt"
                              )  # Set the path of validation dataset

        # Parameters:
        #  0. task: binary classification
        #  1. learning rate: 0.2
        #  2. regular lambda: 0.002
        #  3. evaluation metric: accuracy
        param = {'task': 'binary', 'lr': 0.2, 'lambda': 0.002, 'metric': 'auc'}

        # Start to train
        # The trained model will be stored in model.out
        if self.save_path != '':
            ffm_model.fit(param, os.path.join(self.save_path, 'ffm.model'))
        return ffm_model
Пример #26
0
def FFM():
    ffm_model = xl.create_ffm()
    ffm_model.setTrain("Train_ffm.txt")
    ffm_model.setValidate("Test_ffm.txt")
    # ffm_model.disableEarlyStop()
    param = {
        'task': 'binary',  # ‘binary’ for classification, ‘reg’ for Regression
        'k': 4,  # Size of latent factor
        'lr': 0.2,  # Learning rate for GD
        'opt': 'adagrad',
        'lambda': 0.00002,  # L2 Regularization Parameter
        'metric': 'auc',  # Metric for monitoring validation set performance
        'epoch': 100,  # Maximum number of Epochs
        'nthread': 8,
        'stop_window': 3
    }
    # ffm_model.setSigmoid()
    ffm_model.fit(param, "model.out")
    ffm_model.setTest("./Test_ffm.txt")
    y_pred = ffm_model.predict("./model.out", "./output.txt")
    return y_pred
Пример #27
0
def _FFM_train(df_all):

    train_name, day7_name, cv_name, test_name = df_all
    num = train_name.rsplit('_', 1)[-1]

    ffm_model = xl.create_ffm()
    ffm_model.setTrain(train_name)
    ffm_model.setValidate(cv_name)
    ffm_model.setTest(day7_name)
    now_result = cache_pkl_path + 'FFM_day/model_day7_' + num
    #    ffm_model.disableEarlyStop()
    #
    param = {
        'task': 'binary',
        'lr': 0.1,
        'lambda': 0.000005,
        'epoch': 70,
        'alpha': 0.1,
        'lambda_1': 0.01
    }
    ffm_model.fit(param, "./model.out")

    ffm_model.setSigmoid()
    ffm_model.predict("./model.out", now_result)
    predict_day7 = pd.read_csv(now_result, header=None)
    predict_day7 = np.squeeze(predict_day7.values)

    ffm_model.setTest(cv_name)
    now_result = cache_pkl_path + 'FFM_day/model_cv__' + num
    ffm_model.predict("./model.out", now_result)
    predict_cv = pd.read_csv(now_result, header=None)
    predict_cv = np.squeeze(predict_cv.values)

    ffm_model.setTest(test_name)
    now_result = cache_pkl_path + 'FFM_day/model_test__' + num
    ffm_model.predict("./model.out", now_result)
    predict_test = pd.read_csv(now_result, header=None)
    predict_test = np.squeeze(predict_test.values)

    return (predict_day7, predict_cv, predict_test)
Пример #28
0
def train():
    ffm = xl.create_ffm()
    train, test, _ = splitFFM()

    ffm.setTrain(train)
    ffm.setValidate(test)

    model = './modelFFM'
    sTime = time.strftime('%m%d-%H%M', time.localtime(time.time()))
    if not os.path.exists(model): os.mkdir(model)
    model = '%s/xlModel_%s.txt' % (model, sTime)

    params = {
        'epoch': 100,
        'metric': 'auc',
        'task': 'binary',
        'k': 4,
        'lr': 0.02,
        'lambda': 1e-6,
        'stop_window': 3,
    }
    ffm.fit(params, model)
Пример #29
0
def run_xlearn():
    if MODEL == 'LM':
        model = xl.create_linear()
    elif MODEL == 'FM':
        model = xl.create_fm()
    else:
        assert MODEL == 'FFM'
        model = xl.create_ffm()
    model.setTrain(TRAIN)
    model.setValidate(TEST)
    if WINDOW == 0:
        model.disableEarlyStop()
    param = {
        'task': TASK,
        'epoch': EPOCH,
        'opt': OPT,
        'metric': METRIC,
        'k': K,
        'lr': LEARNING_RATE,
        'lambda': LAMBDA,
    }
    model.fit(param, './xlearn.model')
def train() :
#    cf = file(sys.argv[1], "r")
#    ct = file(sys.argv[4], "r")
    model = xl.create_ffm()
    param = {
                'task':'reg', 
                'lr':0.1, 
                'lambda': 1,
                'metric':'rmse',
                'opt':'adagrad',
                'k': 256,
                'stop_window':1,
                'init': 0.1,
                'epoch':4
            }
    model.setTrain(sys.argv[2])
    model.setValidate(sys.argv[3])
    model.setTest(sys.argv[4])
    model.disableNorm();
    model.disableEarlyStop();
    print param
    model.fit(param, "./model.out")
    model.predict("./model.out", "./xprediction.txt")
Пример #31
0
#! /usr/bin/python3


import xlearn as xl

root_path = '../../tencent_dataset/preliminary_contest_data/'

ffm_model = xl.create_ffm()
ffm_model.setTrain(root_path + 'train.ffm')
ffm_model.setValidate(root_path + 'valid.ffm')


param = {
    'task': 'binary',
    'lr': 0.1,
    'lambda': 0.002,
    'epoch': 20,
    'metric': 'auc'
}

ffm_model.fit(param, root_path + 'ffm_model.out')


ffm_model.setTest(root_path + 'pred.ffm')
ffm_model.predict(root_path + 'ffm_model.out', root_path + 'output.txt')
         'lambda': 0.002,
         'epoch': 100,
         'opt': 'adagrad'
         }

## training
fm_model.fit(param, fp_model_fm)

## testing
fm_model.setTest(fp_test)
fm_model.setSigmoid()
fm_model.predict(fp_model_fm, fp_pred_fm)

##================ FFM ==================##
## training setting
ffm_model = xl.create_ffm()  # Use field-aware factorization machine
ffm_model.setTrain(fp_train)   # Training data
ffm_model.setValidate(fp_valid)  # Validation data
ffm_model.setSigmoid()

param = {'task': 'binary',
         'k': 20,
         'lr': 0.02, 
         'lambda': 0.0001,
         'epoch': 100,
         'opt': 'adagrad'
         }

## Train model
ffm_model.fit(param, fp_model_ffm)