Exemplo n.º 1
0
def factorization(
    model_path='models/simple_factorization',
    simple=True,
    use_item_data = True,
    item_data_output='data/model_data/item_data.csv',
    model_params={'nmf':False,
                  'verbose': False}
):
    """builds and saves factorization model"""
    get.model_data(**data_params)
    mat = gl.SFrame.read_csv(data_params['output_path'])
    if simple:
        mat = mat[['user_id', 'item_id']]
    if use_item_data:
        get.item_data(item_data_output)
        item_data = gl.SFrame.read_csv(item_data_output)
        model = gl.recommender.ranking_factorization_recommender.create(
            mat,
            item_data=item_data, **model_params
        )
    else:
        model = gl.recommender.ranking_factorization_recommender.create(
            mat,
            **model_params
        )
    save_model(model_path, model)
    log.info('saved model as %s' % model_path)
Exemplo n.º 2
0
def item_similarity(data,
                    model_path='models/item_similarity',
                    model_params={'similarity_type': 'jaccard',
                                  'verbose': False}
):
    """builds and saves item similarity model"""
    get.model_data(**data_params)
    mat = gl.SFrame.read_csv(data_params['output_path'])
    mat = mat[['user_id', 'item_id']]

    model = gl.recommender.item_similarity_recommender.create(
        mat,
        **model_params
        )
    return model
Exemplo n.º 3
0
def main():
    """builds an item similarity model from completed trades
    also samples recommendations and evaluates precision/recall of model"""
    # build and save model
    df = get.model_data(sparse=4,
                        outlier=10000,
                        iso_rate=0,
                        proposed_rate=0,
                        traded_rate=1,
                        trade_path=TRADE_PATH,
                        trade_item_path=TRADE_ITEM_PATH,
                        ftiso_path=FTISO_PATH)
    mat = gl.SFrame(df[['user_id', 'item_id']])
    model = gl.recommender.item_similarity_recommender.create(mat)
    save_model(MODEL_PATH, model)
    # get recommendations for selected users
    ft, user_beers, beers = get.rec_data(FTISO_PATH,
                                          TRADE_PATH,
                                          TRADE_ITEM_PATH,
                                          BEERS_PATH)
    users = [3381, 14239, 6601, 8958, 1440, 880, 57]
    recommends = check_recs(users, model, ft, user_beers, beers)
    # find precision and recall for model
    train, test = gl.recommender.util.random_split_by_user(gl.SFrame(user_beers))
    model = gl.recommender.item_similarity_recommender.create(train)
    pr = model.evaluate(test, metric='precision_recall')
    return recommends, pr
Exemplo n.º 4
0
def load_rec_data(ftiso_path, trade_path, trade_item_path):
    ftiso = pd.read_csv(
        '../data/raw_data/ftiso.csv',
        header=None,
        names=[
            'ID',
            'Beer ID',
            'Quantity',
            'Cellar Quantity',
            'User ID',
            'Type',
            'Accessible List',
            'Created',
            'Modified']
    )
    ft = ftiso[ftiso['Type'] == 'ft']
    data = get.model_data(sparse=0,
                       outlier=10000000,
                       iso_rate=1,
                       proposed_rate=1,
                       traded_rate=1,
                       trade_path='../data/raw_data/trades.csv',
                       trade_item_path='../data/raw_data/trade_items.csv',
                       ftiso_path='../data/raw_data/ftiso.csv')
    user_beers = pd.read_csv('../data/model_data/all_user_beers')[['user_id', 'item_id']]
    beers = pd.read_csv('../data/raw_data/beers.csv')

    return ft, user_beers, beers
Exemplo n.º 5
0
def _load_data():
    """create and csvs for training the model and making recommendations"""
    # item similarity model csv
    get.model_data(sparse=4,
                   outlier=10000,
                   iso_rate=0,
                   proposed_rate=0,
                   traded_rate=1,
                   trade_path=TRADE_PATH,
                   trade_item_path=TRADE_ITEM_PATH,
                   ftiso_path=FTISO_PATH,
                   output_path=IS_DF_PATH)

    # get nmf model csv
    get.model_data(sparse=3,
                   outlier=500,
                   iso_rate=0,
                   proposed_rate=0,
                   traded_rate=1,
                   trade_path=TRADE_PATH,
                   trade_item_path=TRADE_ITEM_PATH,
                   ftiso_path=FTISO_PATH,
                   output_path=NMF_DF_PATH)

    # get popularity model csv
    get.model_data(sparse=0,
                   outlier=500,
                   iso_rate=0,
                   proposed_rate=0,
                   traded_rate=1,
                   trade_path=TRADE_PATH,
                   trade_item_path=TRADE_ITEM_PATH,
                   ftiso_path=FTISO_PATH,
                   output_path=POP_DF_PATH)
Exemplo n.º 6
0
def _load_data():
    """create and csvs for training the model and making recommendations"""
    # item similarity model csv
    get.model_data(sparse=4,
                    outlier=10000,
                    iso_rate=0,
                    proposed_rate=0,
                    traded_rate=1,
                    trade_path=TRADE_PATH,
                    trade_item_path=TRADE_ITEM_PATH,
                    ftiso_path=FTISO_PATH,
                    output_path=IS_DF_PATH)

    # get nmf model csv
    get.model_data(sparse=3,
                    outlier=500,
                    iso_rate=0,
                    proposed_rate=0,
                    traded_rate=1,
                    trade_path=TRADE_PATH,
                    trade_item_path=TRADE_ITEM_PATH,
                    ftiso_path=FTISO_PATH,
                    output_path=NMF_DF_PATH)


    # get popularity model csv
    get.model_data(sparse=0,
                    outlier=500,
                    iso_rate=0,
                    proposed_rate=0,
                    traded_rate=1,
                    trade_path=TRADE_PATH,
                    trade_item_path=TRADE_ITEM_PATH,
                    ftiso_path=FTISO_PATH,
                    output_path=POP_DF_PATH)
Exemplo n.º 7
0
def main():
    """builds an item similarity model from completed trades, including beer side data"""
    # load data
    # if exists thing
    df = get.model_data(sparse=4,
                        outlier=10000,
                        iso_rate=0,
                        proposed_rate=0,
                        traded_rate=1,
                        trade_path=TRADE_PATH,
                        trade_item_path=TRADE_ITEM_PATH,
                        ftiso_path=FTISO_PATH)
    mat = gl.SFrame(df[['user_id', 'item_id']])
    # build model
    model = gl.recommender.item_similarity_recommender.create(mat)
    # save model
    save_model(MODEL_PATH, model)
Exemplo n.º 8
0
def build_nmf_model():
    """builds an nmf model from completed trades, including beer side data"""
    # load data
    df = get.model_data(sparse=4,
                        outlier=500,
                        iso_rate=0,
                        proposed_rate=0,
                        traded_rate=1,
                        trade_path=TRADE_PATH,
                        trade_item_path=TRADE_ITEM_PATH,
                        ftiso_path=FTISO_PATH)
    mat = gl.SFrame(df[['user_id', 'item_id']])
    item_data = gl.SFrame(get.item_data())
    # build model
    model = gl.recommender.ranking_factorization_recommender.create(
        mat,
        item_data=item_data,
        nmf=False
    )
    return model
Exemplo n.º 9
0
def build_nmf_model():
    """builds an nmf model from completed trades, including beer side data"""
    # load data
    df = get.model_data(sparse=4,
                        outlier=500,
                        iso_rate=0,
                        proposed_rate=0,
                        traded_rate=1,
                        trade_path=TRADE_PATH,
                        trade_item_path=TRADE_ITEM_PATH,
                        ftiso_path=FTISO_PATH)
    mat = gl.SFrame(df[['user_id', 'item_id']])
    item_data = gl.SFrame(get.item_data())
    # build model
    model = gl.recommender.ranking_factorization_recommender.create(mat, item_data=item_data, nmf=False)
    # check recs
    users = [3381, 14239, 6601, 8958, 1440, 880, 57]
    ft, user_beers, beers = check.load_rec_data()
    recommends = check.recs(users, model, ft, user_beers, beers)
    # # check precision & recall
    # train, test = gl.recommender.util.random_split_by_user(gl.SFrame(user_beers))
    # model = gl.recommender.ranking_factorization_recommender.create(train, item_data=None, nmf=False, verbose=False)
    # pr = model.evaluate(test, metric='precision_recall')
    return model, recommends#, pr
Exemplo n.º 10
0
def main():
    get.model_data(**data_params)
    mat = gl.SFrame.read_csv(data_params['output_path'])