def load_test_data(score_domain=(1.0, 5.0, 0.5)):
    from kamrecsys.data import EventWithScoreData
    from kamrecsys.datasets import SAMPLE_PATH

    infile = os.path.join(SAMPLE_PATH, 'pci.event')
    dtype = np.dtype([('event', 'U18', 2), ('score', float)])
    x = np.genfromtxt(fname=infile, delimiter='\t', dtype=dtype)
    data = EventWithScoreData(n_otypes=2, event_otypes=np.array([0, 1]))
    data.set_event(x['event'], x['score'], score_domain=score_domain)
    return data, x
def load_test_data(score_domain=(1.0, 5.0, 0.5)):
    from kamrecsys.data import EventWithScoreData
    from kamrecsys.datasets import SAMPLE_PATH

    infile = os.path.join(SAMPLE_PATH, 'pci.event')
    dtype = np.dtype([('event', 'U18', 2), ('score', float)])
    x = np.genfromtxt(fname=infile, delimiter='\t', dtype=dtype)
    data = EventWithScoreData(n_otypes=2, event_otypes=np.array([0, 1]))
    data.set_event(x['event'], x['score'], score_domain=score_domain)
    return data, x
示例#3
0
def training(opt, ev, tsc, event_feature=None, fold=0):
    """
    training model

    Parameters
    ----------
    opt : dict
        parsed command line options
    ev : array, size=(n_events, 2), dtype=np.int
        array of events in external ids
    tsc : array, size=(n_events,), dtype=np.float
        true scores
    event_feature : optional, structured array
        structured array of event features
    fold : int, default=0
        fold No.

    Returns
    -------
    rcmdr : EventScorePredictor
        trained recommender
    """

    # generate event data
    data = EventWithScoreData(n_otypes=2, n_stypes=1)
    if np.all(opt.domain == [0, 0, 0]):
        score_domain = (np.min(tsc), np.max(tsc),
                        np.min(np.diff(np.unique(tsc))))
    else:
        score_domain = tuple(opt.domain)
    logger.info("score_domain = " + str(score_domain))
    data.set_events(ev,
                    tsc,
                    score_domain=score_domain,
                    event_feature=event_feature)

    # init learning results
    if 'training_start_time' not in opt:
        opt.training_start_time = [0] * opt.fold
    if 'training_end_time' not in opt:
        opt.training_end_time = [0] * opt.fold
    if 'learning_i_loss' not in opt:
        opt.learning_i_loss = [np.inf] * opt.fold
    if 'learning_f_loss' not in opt:
        opt.learning_f_loss = [np.inf] * opt.fold
    if 'learning_opt_outputs' not in opt:
        opt.learning_opt_outputs = [None] * opt.fold

    # set starting time
    start_time = datetime.datetime.now()
    start_utime = os.times()[0]
    opt.training_start_time[fold] = start_time.isoformat()
    logger.info("training_start_time = " + start_time.isoformat())

    # create and learning model
    rcmdr = EventScorePredictor(C=opt.C,
                                k=opt.k,
                                tol=opt.tol,
                                maxiter=opt.maxiter,
                                random_state=opt.rseed)
    rcmdr.fit(data)

    # set end and elapsed time
    end_time = datetime.datetime.now()
    end_utime = os.times()[0]
    elapsed_time = end_time - start_time
    elapsed_utime = end_utime - start_utime
    opt.training_end_time[fold] = end_time.isoformat()
    logger.info("training_end_time = " + end_time.isoformat())
    if 'training_elapsed_time' not in opt:
        opt.training_elapsed_time = elapsed_time
    else:
        opt.training_elapsed_time += elapsed_time
    logger.info("training_elapsed_time = " + str(opt.training_elapsed_time))
    if 'training_elapsed_utime' not in opt:
        opt.training_elapsed_utime = elapsed_utime
    else:
        opt.training_elapsed_utime += elapsed_utime
    logger.info("training_elapsed_utime = " + str(opt.training_elapsed_utime))

    # preserve optimizer's outputs
    opt.learning_i_loss[fold] = rcmdr.i_loss_
    opt.learning_f_loss[fold] = rcmdr.f_loss_
    opt.learning_opt_outputs[fold] = rcmdr.opt_outputs_

    return rcmdr
示例#4
0
def training(opt, ev, tsc, event_feature=None, fold=0):
    """
    training model

    Parameters
    ----------
    opt : dict
        parsed command line options
    ev : array, size=(n_events, 2), dtype=np.int
        array of events in external ids
    tsc : array, size=(n_events,), dtype=np.float
        true scores
    event_feature : optional, structured array
        structured array of event features
    fold : int, default=0
        fold No.

    Returns
    -------
    rcmdr : EventScorePredictor
        trained recommender
    """

    # generate event data
    data = EventWithScoreData(n_otypes=2, n_stypes=1)
    if np.all(opt.domain == [0, 0, 0]):
        score_domain = (
            np.min(tsc), np.max(tsc), np.min(np.diff(np.unique(tsc))))
    else:
        score_domain = tuple(opt.domain)
    logger.info("score_domain = " + str(score_domain))
    data.set_events(ev, tsc, score_domain=score_domain,
                    event_feature=event_feature)

    # init learning results
    if 'training_start_time' not in opt:
        opt.training_start_time = [0] * opt.fold
    if 'training_end_time' not in opt:
        opt.training_end_time = [0] * opt.fold
    if 'learning_i_loss' not in opt:
        opt.learning_i_loss = [np.inf] * opt.fold
    if 'learning_f_loss' not in opt:
        opt.learning_f_loss = [np.inf] * opt.fold
    if 'learning_opt_outputs' not in opt:
        opt.learning_opt_outputs = [None] * opt.fold

    # set starting time
    start_time = datetime.datetime.now()
    start_utime = os.times()[0]
    opt.training_start_time[fold] = start_time.isoformat()
    logger.info("training_start_time = " + start_time.isoformat())

    # create and learning model
    rcmdr = EventScorePredictor(
        C=opt.C, k=opt.k, tol=opt.tol, maxiter=opt.maxiter,
        random_state=opt.rseed)
    rcmdr.fit(data)

    # set end and elapsed time
    end_time = datetime.datetime.now()
    end_utime = os.times()[0]
    elapsed_time = end_time - start_time
    elapsed_utime = end_utime - start_utime
    opt.training_end_time[fold] = end_time.isoformat()
    logger.info("training_end_time = " + end_time.isoformat())
    if 'training_elapsed_time' not in opt:
        opt.training_elapsed_time = elapsed_time
    else:
        opt.training_elapsed_time += elapsed_time
    logger.info("training_elapsed_time = " + str(opt.training_elapsed_time))
    if 'training_elapsed_utime' not in opt:
        opt.training_elapsed_utime = elapsed_utime
    else:
        opt.training_elapsed_utime += elapsed_utime
    logger.info("training_elapsed_utime = " + str(opt.training_elapsed_utime))

    # preserve optimizer's outputs
    opt.learning_i_loss[fold] = rcmdr.i_loss_
    opt.learning_f_loss[fold] = rcmdr.f_loss_
    opt.learning_opt_outputs[fold] = rcmdr.opt_outputs_

    return rcmdr