Exemplo n.º 1
0
    lr = LogisticRegression(C=0.1, penalty='l2')
    lr.fit(train_X_lr, train_y)

    # test_predict = vectorizer.transform([{'尼泊尔': 1},
    #                                      {'赵本山': 1, '赵薇': 1, '张曼玉': 1, 'rat': '8.0',
    #                                                   'ravg': 3.85714,
    #                                                   'rcount': 7.0,
    #                                                   'rmax': 5.0,
    #                                                   'rmedian': 4.0,
    #                                                   'rmin': 2.0,
    #                                                   'rsum': 27.0},
    #                                      {'克里斯·派恩': 1, '扎克瑞·昆图': 1, '佐伊·索尔达娜': 1,'西蒙·佩吉':1, '安东·叶利钦':1, '林诣彬':1 ,
    #                                                  '美国':1,
    #                                                   'rat': '8.0',
    #                                                   'ravg': 3.85714,
    #                                                   'rcount': 7.0,
    #                                                   'rmax': 5.0,
    #                                                   'rmedian': 4.0,
    #                                                   'rmin': 2.0,
    #                                                   'rsum': 27.0}])
    # print(lr.predict_proba(test_predict))

    lr_predict_final = lr.predict_proba(X_predict)
    update_lr_rat(df_data, lr_predict_final.tolist())
    print(lr.classes_)


if __name__ == '__main__':
    sched_util.schedule_(10, task_func=process_task)
Exemplo n.º 2
0
    start_time = datetime.datetime.now()

    conn = common.get_connection()
    df_comment_new_data_ldim = get_comment_data()
    conn = common.get_connection()
    for i in df_comment_new_data_ldim.index:
        print(df_comment_new_data_ldim.iloc[i]['MOVIEID'],
              df_comment_new_data_ldim.iloc[i]['USERID'])
        ibmovie_list = get_ibmovie_by_movieid(
            df_comment_new_data_ldim.iloc[i]['MOVIEID'], conn)
        for j in ibmovie_list:
            is_exist = exist_in_comment(
                j[0], df_comment_new_data_ldim.iloc[i]['USERID'], conn)
            if is_exist:
                print('exist in comment')
            else:
                insert_or_update_recmovie(
                    j[0], df_comment_new_data_ldim.iloc[i]['USERID'],
                    df_comment_new_data_ldim.iloc[i]['MOVIEID'], conn)
        #update_comment_new_data_flag(df_comment_new_data_ldim.iloc[i]['ID'], conn)
        conn.commit()

    end_time = datetime.datetime.now()
    print(end_time - start_time)
    print('finish process movie based recall task:' +
          str(datetime.datetime.now()))


if __name__ == '__main__':
    sched_util.schedule_(60, func_main)
Exemplo n.º 3
0
                                                     conn)
    test_file_lr_base_url = cfg.get_config_property('test_file_lr_t_url', conn)
    train_file_lr = train_file_lr_base_url % time_now_str
    test_file_lr = test_file_lr_base_url % time_now_str

    # 转换为libsvm格式数据
    dump_svmlight_file(train_X_lr, train_y_lr_, train_file_lr)
    dump_svmlight_file(test_X_lr, test_y_lr_, test_file_lr)

    cfg.set_config_property(train_file_fm, 'train_file_fm_url', conn)
    cfg.set_config_property(test_file_fm, 'test_file_fm_url', conn)
    cfg.set_config_property(train_file_lr, 'train_file_lr_url', conn)
    cfg.set_config_property(test_file_lr, 'test_file_lr_url', conn)

    dict2vec_save_url = cfg.get_config_property('dict2vec', conn)
    with open(dict2vec_save_url, 'wb') as f:
        pkl.dump(v, f)

    scaler_save_url = cfg.get_config_property('scaler', conn)
    with open(scaler_save_url, 'wb') as f:
        pkl.dump(scaler, f)

    end_time = datetime.datetime.now()
    print(end_time - start_time)
    print('finish process comment to libsvm task:' +
          str(datetime.datetime.now()))


if __name__ == '__main__':
    sched_util.schedule_(60, process_task)
    start_time = datetime.datetime.now()

    conn = common.get_connection()
    df_comment_new_data_ldim = get_comment_data()
    conn = common.get_connection()
    for i in df_comment_new_data_ldim.index:
        print(df_comment_new_data_ldim.iloc[i]['MOVIEID'],
              df_comment_new_data_ldim.iloc[i]['USERID'])
        ibmovie_list = get_ibmovie_by_movieid(
            df_comment_new_data_ldim.iloc[i]['MOVIEID'], conn)
        for j in ibmovie_list:
            is_exist = exist_in_comment(
                j[0], df_comment_new_data_ldim.iloc[i]['USERID'], conn)
            if is_exist:
                print('exist in comment')
            else:
                insert_or_update_recmovie(
                    j[0], df_comment_new_data_ldim.iloc[i]['USERID'],
                    df_comment_new_data_ldim.iloc[i]['MOVIEID'], conn)
        #update_comment_new_data_flag(df_comment_new_data_ldim.iloc[i]['ID'], conn)
        conn.commit()

    end_time = datetime.datetime.now()
    print(end_time - start_time)
    print('finish process movie based recall task:' +
          str(datetime.datetime.now()))


if __name__ == '__main__':
    sched_util.schedule_(10 * 60 * 1000, func_main)