def join(self): arg = pydev.AutoArg() test_num = int(arg.option('testnum', -1)) input_filename = arg.option('f') movie_dir = arg.option('m') slot_output_filename = arg.option('s') output_filename = arg.option('o') coder_output_filename = arg.option('c') data = utils.readfile(file(input_filename), test_num=test_num) extractor = MovieLensRankingFeatureExtractor(movie_dir) writer = sf.SlotFileWriter(output_filename) for user_id, item_id, click in tqdm.tqdm(data): writer.begin_instance(click) extractor.begin(user_id, item_id) ps = extractor.processes() for p in ps: slot, lst = p() writer.write_slot(slot, lst) writer.end_instance() extractor.save(coder_output_filename, slot_output_filename) writer.summary()
def lr(self): import train_lr model = train_lr.LRRank(138494, 131263, 8) auto_arg = pydev.AutoArg() model_path = auto_arg.option('model', 'temp/lr.pkl') model.load_state_dict(torch.load(model_path)) model.to(self.device) self.test_uid_iid_model(model)
def dnn(self): import train_dnn model = train_dnn.DNNRank(138494, 131263, 16) auto_arg = pydev.AutoArg() model_path = auto_arg.option('model', 'temp/dnn.pkl') model.load_state_dict(torch.load(model_path)) model.to(self.device) self.test_uid_iid_model(model)
def test_ins_data(self, model, slot_info): autoarg = pydev.AutoArg() input_filename = autoarg.option('test') batch_size = int(autoarg.option('batch', 20000)) reader = easy.slot_file.SlotFileReader(input_filename) y = [] y_ = [] reading_count = 0 while reader.epoch() < 1: labels, slots = reader.next(batch_size) # make pytorch data. clicks = torch.Tensor(labels).to(self.device) dct = {} for item in slots: for slot, ids in item: if slot not in dct: # id_list, offset dct[slot] = [[], []] lst = dct[slot][0] idx = dct[slot][1] idx.append(len(lst)) lst += ids x = [] for slot, _ in slot_info: id_list, offset = dct.get(slot, [[], []]) emb_pair = torch.tensor(id_list).to( self.device), torch.tensor(offset).to(self.device) x.append(emb_pair) clicks_ = model.forward(x) y += clicks.view(-1).tolist() y_ += clicks_.view(-1).tolist() pydev.log13('reading_count : %d' % reading_count) reading_count += 1 auc = metrics.roc_auc_score(y, y_) print pydev.log('Valid AUC: %.3f' % auc)
def slot_dnn(self): import train_slot_dnn autoarg = pydev.AutoArg() EmbeddingSize = int(autoarg.option('emb', 32)) slotinfo_filename = autoarg.option('s') model_path = autoarg.option('m') # temp get slot_info. slot_info = [] for slot, slot_feanum in pydev.foreach_row(file(slotinfo_filename), format='si'): slot_info.append((slot, slot_feanum)) model = train_slot_dnn.SlotDnnRank(slot_info, EmbeddingSize).to(self.device) model.load_state_dict(torch.load(model_path)) self.test_ins_data(model, slot_info)
for uid, iid, click in self.train: user_ids.append(uid) item_ids.append(iid) clicks.append(float(click)) if len(clicks)>=self.batch_size: yield (torch.tensor(user_ids).to(self.device), torch.tensor(item_ids).to(self.device), torch.tensor(clicks).to(self.device)) user_ids = [] item_ids = [] clicks = [] if __name__=='__main__': autoarg = pydev.AutoArg() data_dir = autoarg.option('data', 'data/') model_save_path = autoarg.option('output', 'temp/dnn.pkl') TestNum = int(autoarg.option('testnum', -1)) EmbeddingSize = int(autoarg.option('embed', 16)) EpochCount = int(autoarg.option('epoch', 3)) BatchSize = int(autoarg.option('batch', 1024)) device_name = autoarg.option('device', 'cuda') pydev.info('EmbeddingSize=%d' % EmbeddingSize) pydev.info('Epoch=%d' % EpochCount) pydev.info('BatchSize=%d' % BatchSize) device = torch.device(device_name)