def load_test_data(score_domain=(1.0, 5.0, 0.5)): from kamrecsys.data import EventWithScoreData from kamrecsys.datasets import SAMPLE_PATH infile = os.path.join(SAMPLE_PATH, 'pci.event') dtype = np.dtype([('event', 'U18', 2), ('score', float)]) x = np.genfromtxt(fname=infile, delimiter='\t', dtype=dtype) data = EventWithScoreData(n_otypes=2, event_otypes=np.array([0, 1])) data.set_event(x['event'], x['score'], score_domain=score_domain) return data, x
def training(opt, ev, tsc, event_feature=None, fold=0): """ training model Parameters ---------- opt : dict parsed command line options ev : array, size=(n_events, 2), dtype=np.int array of events in external ids tsc : array, size=(n_events,), dtype=np.float true scores event_feature : optional, structured array structured array of event features fold : int, default=0 fold No. Returns ------- rcmdr : EventScorePredictor trained recommender """ # generate event data data = EventWithScoreData(n_otypes=2, n_stypes=1) if np.all(opt.domain == [0, 0, 0]): score_domain = (np.min(tsc), np.max(tsc), np.min(np.diff(np.unique(tsc)))) else: score_domain = tuple(opt.domain) logger.info("score_domain = " + str(score_domain)) data.set_events(ev, tsc, score_domain=score_domain, event_feature=event_feature) # init learning results if 'training_start_time' not in opt: opt.training_start_time = [0] * opt.fold if 'training_end_time' not in opt: opt.training_end_time = [0] * opt.fold if 'learning_i_loss' not in opt: opt.learning_i_loss = [np.inf] * opt.fold if 'learning_f_loss' not in opt: opt.learning_f_loss = [np.inf] * opt.fold if 'learning_opt_outputs' not in opt: opt.learning_opt_outputs = [None] * opt.fold # set starting time start_time = datetime.datetime.now() start_utime = os.times()[0] opt.training_start_time[fold] = start_time.isoformat() logger.info("training_start_time = " + start_time.isoformat()) # create and learning model rcmdr = EventScorePredictor(C=opt.C, k=opt.k, tol=opt.tol, maxiter=opt.maxiter, random_state=opt.rseed) rcmdr.fit(data) # set end and elapsed time end_time = datetime.datetime.now() end_utime = os.times()[0] elapsed_time = end_time - start_time elapsed_utime = end_utime - start_utime opt.training_end_time[fold] = end_time.isoformat() logger.info("training_end_time = " + end_time.isoformat()) if 'training_elapsed_time' not in opt: opt.training_elapsed_time = elapsed_time else: opt.training_elapsed_time += elapsed_time logger.info("training_elapsed_time = " + str(opt.training_elapsed_time)) if 'training_elapsed_utime' not in opt: opt.training_elapsed_utime = elapsed_utime else: opt.training_elapsed_utime += elapsed_utime logger.info("training_elapsed_utime = " + str(opt.training_elapsed_utime)) # preserve optimizer's outputs opt.learning_i_loss[fold] = rcmdr.i_loss_ opt.learning_f_loss[fold] = rcmdr.f_loss_ opt.learning_opt_outputs[fold] = rcmdr.opt_outputs_ return rcmdr
def training(opt, ev, tsc, event_feature=None, fold=0): """ training model Parameters ---------- opt : dict parsed command line options ev : array, size=(n_events, 2), dtype=np.int array of events in external ids tsc : array, size=(n_events,), dtype=np.float true scores event_feature : optional, structured array structured array of event features fold : int, default=0 fold No. Returns ------- rcmdr : EventScorePredictor trained recommender """ # generate event data data = EventWithScoreData(n_otypes=2, n_stypes=1) if np.all(opt.domain == [0, 0, 0]): score_domain = ( np.min(tsc), np.max(tsc), np.min(np.diff(np.unique(tsc)))) else: score_domain = tuple(opt.domain) logger.info("score_domain = " + str(score_domain)) data.set_events(ev, tsc, score_domain=score_domain, event_feature=event_feature) # init learning results if 'training_start_time' not in opt: opt.training_start_time = [0] * opt.fold if 'training_end_time' not in opt: opt.training_end_time = [0] * opt.fold if 'learning_i_loss' not in opt: opt.learning_i_loss = [np.inf] * opt.fold if 'learning_f_loss' not in opt: opt.learning_f_loss = [np.inf] * opt.fold if 'learning_opt_outputs' not in opt: opt.learning_opt_outputs = [None] * opt.fold # set starting time start_time = datetime.datetime.now() start_utime = os.times()[0] opt.training_start_time[fold] = start_time.isoformat() logger.info("training_start_time = " + start_time.isoformat()) # create and learning model rcmdr = EventScorePredictor( C=opt.C, k=opt.k, tol=opt.tol, maxiter=opt.maxiter, random_state=opt.rseed) rcmdr.fit(data) # set end and elapsed time end_time = datetime.datetime.now() end_utime = os.times()[0] elapsed_time = end_time - start_time elapsed_utime = end_utime - start_utime opt.training_end_time[fold] = end_time.isoformat() logger.info("training_end_time = " + end_time.isoformat()) if 'training_elapsed_time' not in opt: opt.training_elapsed_time = elapsed_time else: opt.training_elapsed_time += elapsed_time logger.info("training_elapsed_time = " + str(opt.training_elapsed_time)) if 'training_elapsed_utime' not in opt: opt.training_elapsed_utime = elapsed_utime else: opt.training_elapsed_utime += elapsed_utime logger.info("training_elapsed_utime = " + str(opt.training_elapsed_utime)) # preserve optimizer's outputs opt.learning_i_loss[fold] = rcmdr.i_loss_ opt.learning_f_loss[fold] = rcmdr.f_loss_ opt.learning_opt_outputs[fold] = rcmdr.opt_outputs_ return rcmdr