Exemplo n.º 1
0
    def __init__(self,
                 use_normalized_data=True,
                 start_from_zero=False,
                 learning_rate=0.001):
        init_logger(log_file='log/pmf.log', log_level=logging.INFO)
        self.exp_id = datetime.today().strftime('%Y%m%d%H%M%S')
        self.ratings_file = ratings_file
        self.load_data()
        self.obs_num = self.ratings_vector.shape[0]
        self.use_normalized_data = use_normalized_data
        self.start_from_zero = start_from_zero  #数据里user_id和item_id是否从0开始,豆瓣是从0开始的

        if self.use_normalized_data:
            self.generate_normalized_ratings()
        self.split_data()

        self.learning_rate = learning_rate
        self.epsilon = learning_rate
        #learning rate
        self.lamb = 0.01  #Regularization parameter
        self.momentum = 0.8
        self.max_epoch = 1000  #iteration
        self.feat_num = 10

        #uid, vid以observation里出现的uid为准, 如何划分数据也是一个问题
        self.user_num = self.ratings_vector[:, 0].max()
        self.item_num = self.ratings_vector[:, 1].max()

        self.U_shape = (self.user_num, self.feat_num)
        self.V_shape = (self.item_num, self.feat_num)

        #U: matrix of user features, V: matrix of item features, generated from gaussian distribution
        self.U = np.random.standard_normal(self.U_shape)
        self.V = np.random.standard_normal(self.V_shape)
Exemplo n.º 2
0
    def __init__(self, use_normalized_data=True, start_from_zero=False, learning_rate=0.001):
        init_logger(log_file='log/pmf.log', log_level=logging.INFO)
        self.exp_id = datetime.today().strftime('%Y%m%d%H%M%S')
        self.ratings_file = ratings_file
        self.load_data()
        self.obs_num = self.ratings_vector.shape[0]
        self.use_normalized_data = use_normalized_data
        self.start_from_zero = start_from_zero#数据里user_id和item_id是否从0开始,豆瓣是从0开始的

        if self.use_normalized_data:
            self.generate_normalized_ratings()
        self.split_data()

        self.learning_rate = learning_rate
        self.epsilon = learning_rate; #learning rate
        self.lamb = 0.01 #Regularization parameter
        self.momentum = 0.8
        self.max_epoch = 1000 #iteration
        self.feat_num = 10

        #uid, vid以observation里出现的uid为准, 如何划分数据也是一个问题
        self.user_num = self.ratings_vector[:,0].max()
        self.item_num = self.ratings_vector[:,1].max()

        self.U_shape = (self.user_num, self.feat_num)
        self.V_shape = (self.item_num, self.feat_num)

        #U: matrix of user features, V: matrix of item features, generated from gaussian distribution
        self.U = np.random.standard_normal(self.U_shape)
        self.V = np.random.standard_normal(self.V_shape)
Exemplo n.º 3
0
def set_logfile(config, args):
    log_filename = 'log/fmg_%s_%s_split%s.log' % (
        config['dt'], config['exp_type'], config['sn'])
    if config['exp_type'] == 'vary_mg':
        log_filename = 'log/fmg_%s_%s_split%s_reg%s.log' % (
            config['dt'], config['exp_type'], config['sn'], config['reg'])
    config['log_filename'] = log_filename
    init_logger('', config['log_filename'], logging.INFO, False)
Exemplo n.º 4
0
def init_conifg(dt_arg, reg, exp_type, eps, K=10, F=10):
    global rating_filename
    global logger
    global exp_id
    global dt
    dt = dt_arg

    if dt == 'yelp':
        rating_filename = 'ratings_filter5'
    elif dt in ['yelp-200k', 'yelp-50k', 'yelp-10k', 'yelp-5k', 'yelp-100k']:
        rating_filename = 'ratings'
    elif dt in ['douban']:
        rating_filename = 'ratings'
    elif dt == 'cikm-yelp':
        rating_filename = 'ratings'
    elif dt == 'yelp-sample':
        rating_filename = ''
    elif dt in ['ml-100k', 'ml-1m', 'ml-10m']:
        rating_filename = '%s-rating' % dt
    elif dt == 'amazon-app':
        rating_filename = 'filter5_ratings_Apps_for_Android'
    elif dt in ['amazon-200k', 'amazon-50k', 'amazon-100k', 'amazon-10k', 'amazon-5k']:
        rating_filename = 'ratings'

    if exp_type == 1:
        log_filename = 'log/%s_fm_glasso_once_reg%s_eps%s_K%s_F%s.log' % (dt, reg, eps, K, F)
    elif exp_type == 2:
        log_filename = 'log/%s_fm_glasso_regv%s_eps%s_K%s_F%s.log' % (dt, reg, eps, K, F)

    exp_id = int(time.time())
    logger = init_logger('exp_%s' % exp_id, log_filename, logging.INFO, False)
Exemplo n.º 5
0
def set_logfile(config, args):
    if config.get('reg'):
        if args.mg:
            motif = re.search('m\d', args.mg).group(0)
            log_filename = 'log/%s_%s_%s_%s_reg%s.log' % (
                config['dt'], config.get('log_filename'), "glasso", motif,
                config.get('reg'))
        else:
            log_filename = 'log/%s_%s_%s_reg%s.log' % (
                config['dt'], config.get('log_filename'), "glasso",
                config.get('reg'))
    else:
        log_filename = 'log/%s_%s_%s_regW%s_regP%s.log' % (
            config['dt'], config.get('log_filename'), "glasso",
            config.get('reg_W'), config.get('reg_P'))
    config['log_filename'] = log_filename
    init_logger('exp_%s' % config['exp_id'], config['log_filename'],
                logging.INFO, False)
    logging.info('******\n%s\n******', config)
Exemplo n.º 6
0
import time
import logging
import random
from datetime import datetime

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing

from logging_util import init_logger

from rec_dal import RECDAL
from constant import LR_log_file, vali_output_path, test_output_path

dal = RECDAL()
init_logger(log_file=LR_log_file, log_level=logging.INFO, print_console=True)
#sql for offline prediction
train_pos_sql = '''
            select l.user_id, l.item_id, s.looks, s.stores, s.carts, s.buys, s.total, s.l3d_looks, s.l3d_stores, s.l3d_carts, l3d_buys, s.l3d_total, s.lc_date_delta,
            s.y_looks, s.y_stores, s.y_carts, s.y_buys, s.y_total, s.lc_date_delta, s.item_total, s.item_l3d_total, s.item_yes_total, l.label
            from split_20141217_labels as l join split_20141217_stats as s
                on l.user_id=s.user_id and l.item_id=s.item_id
                where l.label = 1
         '''
train_pos_sql = '''
            select s.user_id, s.item_id, s.buys, s.l3d_buys, s.y_buys, s.total, s.l3d_total, s.y_total, s.item_total, s.item_l3d_total, s.item_yes_total, s.lc_date_delta, l.label
            from split_20141217_labels as l join split_20141217_stats as s
                on l.user_id=s.user_id and l.item_id=s.item_id
                where l.label = 1
         '''
train_neg_sql = '''
Exemplo n.º 7
0
import tempfile
import platform
import datetime
import argparse
from configparser import ConfigParser, ExtendedInterpolation
from typing import List, Dict, Tuple
import subprocess
import release_task_reader
from urllib.request import urlretrieve
from urllib.error import HTTPError
from release_task_reader import ReleaseTask
from installer_utils import PackagingError
from runner import exec_cmd, async_exec_cmd
from logging_util import init_logger

log = init_logger(__name__, debug_mode=False)
timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d--%H:%M:%S')


class QtRepositoryLayout:

    def __init__(self, root_path: str, license: str, repo_domain: str) -> None:
        self.root_path = root_path
        self.license = license
        self.repo_domain = repo_domain
        self.pending = "pending"
        self.staging = "staging"
        self.production = "production"
        # <root_path>/<license>/<pending|staging|production>/<repo_domain>/
        # /data/online_repositories/opensource/pending|staging|production/qtsdkrepository/
        self.base_repo_path = os.path.join(self.root_path, self.license)
Exemplo n.º 8
0
def run_all_epinions():
    #    run(path_str)
    for path_str in ['ratings_only']:
        run(path_str)
    for path_str in [
            'UUB_m1', 'UUB_m2', 'UUB_m3', 'UUB_m4', 'UUB_m5', 'UUB_m6',
            'UUB_m7'
    ]:
        for n in range(11):
            alpha = n * 0.1
            path_str1 = '%s_%s' % (path_str, alpha)
            print 'run for ', path_str1
            run(path_str1)


if __name__ == '__main__':
    global dir_
    if len(sys.argv) == 3:
        dt = sys.argv[1]
        split_num = sys.argv[2]
        dir_ = 'data/%s/exp_split/%s/' % (dt, split_num)
        log_filename = 'log/%s_mf_feature_geneartion_split%s.log' % (dt,
                                                                     split_num)
        exp_id = int(time.time())
        logger = init_logger('exp_%s' % exp_id, log_filename, logging.INFO,
                             False)
        run_all_epinions()
    else:
        print 'please speficy the data and path_str'
        sys.exit(0)
Exemplo n.º 9
0
    fw = open('data/yelp/samples/grid_res_%s' % filename, 'w+')
    fw.write('\n'.join(
        ['%s\t%s\t%s\t%s\t%s' % (f, k, e, l, r) for f, k, e, l, r in res]))
    fw.close()


if __name__ == '__main__':
    if len(sys.argv) == 3:
        dt = sys.argv[1]

        global logger
        global dir_
        dir_ = 'data/%s/' % dt
        exp_id = int(time.time())
        log_filename = 'log/%s_mf.log' % dt
        logger = init_logger('exp_%s' % str(exp_id), log_filename,
                             logging.INFO, False)
        if int(sys.argv[2]) == 1:
            filename = dir_ + 'ratings.txt'
            K = 10
            eps = 10
            lamb = 1
            max_iter = 500
            exp_rmses = []
            for i in range(1):
                exp_rmses.append(
                    run_basedline(filename,
                                  K,
                                  eps,
                                  lamb,
                                  max_iter,
                                  silent_run=False))