def __init__(self, batch_size, data_dir, model_name='DistMulti'): #self.dataset = dataloader.AmazonDataset('./data') #self.dataset = AmazonDataset('./data', model_name=model_name) self.data_dir = data_dir self.dataset = AmazonDataset(self.data_dir, model_name=model_name) self.batch_size = batch_size self.model_name = model_name
def train_embed(data_dir, params, model_name): # ハイパラ読み込み embedding_dim = params['embedding_dim'] batch_size = params['batch_size'] lr = params['lr'] weight_decay = params['weight_decay'] #warmup = params['warmup'] warmup = 350 #lr_decay_every = params['lr_decay_every'] lr_decay_every = 2 lr_decay_rate = params['lr_decay_rate'] if model_name == 'SparseTransE': alpha = params['alpha'] # dataload dataset = AmazonDataset(data_dir, model_name='TransE') relation_size = len(set(list(dataset.triplet_df['relation'].values))) entity_size = len(dataset.entity_list) if model_name == 'TransE': model = TransE(int(embedding_dim), relation_size, entity_size).to(device) elif model_name == 'SparseTransE': model = SparseTransE(int(embedding_dim), relation_size, entity_size, alpha=alpha).to(device) iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir, model_name=model_name) #iterater.iterate_epoch(model, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup, # lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5) iterater.iterate_epoch(model, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup, lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5, early_stop=True) return model
def __init__(self, embedding_dim, relation_size, entity_size, data_dir, alpha, mu, kappa, gamma=1): super(PPR_TransE, self).__init__(embedding_dim, relation_size, entity_size, gamma) # dataloader self.dataset = AmazonDataset(data_dir) self.item_idx = torch.tensor([ self.dataset.entity_list.index(i) for i in self.dataset.item_list ], dtype=torch.long, device=device) self.user_idx = torch.tensor([ self.dataset.entity_list.index(u) for u in self.dataset.user_list ], dtype=torch.long, device=device) self.brand_idx = torch.tensor([ self.dataset.entity_list.index(b) for b in self.dataset.brand_list ], dtype=torch.long, device=device) # load network edges = [[r[0], r[1]] for r in self.dataset.triplet_df.values] # user-itemとitem-userどちらの辺も追加 for r in self.dataset.triplet_df.values: if r[2] == 0: edges.append([r[1], r[0]]) self.G = nx.DiGraph() self.G.add_nodes_from( [i for i in range(len(self.dataset.entity_list))]) self.G.add_edges_from(edges) self.H = nx.to_scipy_sparse_matrix(self.G) #self.H = scipy.sparse.coo_matrix(H) #coo = torch.tensor([H.row, H.col], dtype=torch.long) #v = torch.tensor(H.data, dtype=torch.float) #self.H = torch.sparse.FloatTensor(coo, v, torch.Size(H.shape), device=device) # mk_sim_matの係数 self.kappa = kappa # 埋め込み誤差とページランク誤差のバランス # self.lambda_ = lambda_ # 隣接行列と類似度行列のバランス self.alpha = alpha # PPRでのバイアスの強さ self.mu = mu
def objective(trial): start = time.time() # pagerank para mu = trial.suggest_uniform('mu', 0, 1) alpha = trial.suggest_uniform('beta', 0, 0.5) kappa1 = trial.suggest_uniform('kappa1', 0, 1) kappa2 = trial.suggest_uniform('kappa2', 0, 1) kappa3 = trial.suggest_uniform('kappa3', 0, 1) kappa = [kappa1, kappa2, kappa3] # model para embedding_dim = int( trial.suggest_discrete_uniform('embedding_dim', 16, 128, 16)) #alpha = trial.suggest_loguniform('alpha', 1e-6, 1e-2) #SparseTransEの時だけ # training para lambda_ = trial.suggest_uniform('lambada_', 0, 1) batch_size = trial.suggest_int('batch_size', 256, 512, 128) lr = trial.suggest_loguniform('lr', 1e-4, 1e-2) weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2) warmup = trial.suggest_int('warmup', 10, 100) #lr_decay_every = trial.suggest_int('lr_decay_every', 1, 10) lr_decay_every = 2 lr_decay_rate = trial.suggest_uniform('lr_decay_rate', 0.5, 1) data_dir = ['../data_luxury_5core/valid1', '../data_luxury_5core/valid2'] score_sum = 0 for i in range(len(data_dir)): dataset = AmazonDataset(data_dir[i], model_name='TransE') relation_size = len(set(list(dataset.triplet_df['relation'].values))) entity_size = len(dataset.entity_list) ppr_transe = PPR_TransE(embedding_dim, relation_size, entity_size, data_dir[i], alpha, mu, kappa).to(device) iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir[i], model_name=model_name) iterater.iterate_epoch(ppr_transe, lr=lr, epoch=2000, weight_decay=weight_decay, lambda_=lambda_, warmup=warmup, lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5) # inference inf = Inference(data_dir[i]) score = inf.get_score(ppr_transe, kappa, mu, alpha) score_sum += score mi, sec = time_since(time.time() - start) print('{}m{}sec'.format(mi, sec)) return -1 * score_sum / 2
def objective(trial): start = time.time() import gc gc.collect() data_dir = [data_path + '/valid1', data_path + '/valid2'] score_sum = 0 embed_model = {'TransE': TransE, 'SparseTransE': SparseTransE} # hyper para embedding_dim = trial.suggest_discrete_uniform('embedding_dim', 16, 128, 16) if model_name == 'SparseTransE': alpha = trial.suggest_loguniform('alpha', 1e-6, 1e-2) #SparseTransEの時だけ batch_size = trial.suggest_int('batch_size', 128, 512, 128) lr = trial.suggest_loguniform('lr', 1e-4, 1e-2) weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2) #warmup = trial.suggest_int('warmup', 100, 500) #warmup = trial.suggest_int('warmup', 10, 100) warmup = 350 #lr_decay_every = trial.suggest_int('lr_decay_every', 1, 10) lr_decay_every = 2 lr_decay_rate = trial.suggest_uniform('lr_decay_rate', 0.5, 1) for dir_path in data_dir: # データ読み込み dataset = AmazonDataset(dir_path, model_name=model_name) relation_size = len(set(list(dataset.triplet_df['relation'].values))) entity_size = len(dataset.entity_list) #model = TransE(int(embedding_dim), relation_size, entity_size).to(device) model = embed_model[model_name](int(embedding_dim), relation_size, entity_size).to(device) iterater = TrainIterater(batch_size=int(batch_size), data_dir=dir_path, model_name=model_name) score = iterater.iterate_epoch(model, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup, lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5, early_stop=True) score_sum += score torch.cuda.empty_cache() mi, sec = time_since(time.time() - start) print('{}m{}sec'.format(mi, sec)) return -1 * score_sum / 2
def __init__(self, data_dir, model_name, patience): self.dataset = AmazonDataset(data_dir, model_name) self.patience = patience self.model_name = model_name self.user_item_nega_df = self.negative_sampling() y_test = [1 for i in range(len(self.dataset.user_item_test_df))] \ + [0 for i in range(len(self.user_item_nega_df))] self.y_test = np.array(y_test) self.loss_list = [] self.model_list = []
def __init__(self, data_dir): # 本当はAmazonDatasetクラスを渡した方が速いが、 self.evaluater = Evaluater(data_dir) self.dataset = AmazonDataset(data_dir, model_name='TransE') edges = [[r[0], r[1]] for r in self.dataset.triplet_df.values] # user-itemとitem-userどちらの辺も追加 for r in self.dataset.triplet_df.values: if r[2] == 0: edges.append([r[1], r[0]]) # load network self.G = nx.DiGraph() self.G.add_nodes_from( [i for i in range(len(self.dataset.entity_list))]) self.G.add_edges_from(edges)
def objective(trial): start = time.time() # ハイパラ読み込み # gamma = trial.suggest_loguniform('gamma', 1e-6, 1e-3) # lin_model = trial.suggest_categorical('lin_model', ['lasso', 'elastic']) alpha = trial.suggest_uniform('alpha', 0, 1) beta = trial.suggest_uniform('beta', 0, 0.5) data_dirs = [ '../' + data_path + '/valid1/', '../' + data_path + '/valid2/' ] score_sum = 0 for data_dir in data_dirs: # dataload dataset = AmazonDataset(data_dir) # laod model #slim = train_SLIM(data_dir, load=True) sim_mat = load_sim_mat('sim_mat' + data_dir[-2] + '.csr', len(dataset.user_list), len(dataset.item_list)) edges = [[r[0], r[1]] for r in dataset.triplet_df.values] # user-itemとitem-userどちらの辺も追加 for r in dataset.triplet_df.values: if r[2] == 0: edges.append([r[1], r[0]]) # load network G = nx.DiGraph() G.add_nodes_from([i for i in range(len(dataset.entity_list))]) G.add_edges_from(edges) evaluater = Evaluater(data_dir) #ranking_mat = get_ranking_mat(G, slim, alpha, beta, dataset) ranking_mat = get_ranking_mat(G, sim_mat, alpha, beta, dataset) #score = evaluater.topn_map(ranking_mat) score = evaluater.topn_precision(ranking_mat) score_sum += score mi, sec = time_since(time.time() - start) print('{}m{}s'.format(mi, sec)) return -1 * score_sum / 2
def objective(trial): start = time.time() # hyper parameter #gamma = trial.suggest_loguniform('gamma', 1e-6, 1e-3) #lin_model = trial.suggest_categorical('lin_model', ['lasso', 'elastic']) #slim = train_SLIM(lin_model, gamma) alpha = trial.suggest_uniform('alpha', 0, 0.5) beta = trial.suggest_uniform('beta', 0, 0.5) gamma1 = trial.suggest_uniform('gamma1', 0, 1) gamma2 = trial.suggest_uniform('gamma2', 0, 1) gamma3 = trial.suggest_uniform('gamma3', 0, 1) gamma = [gamma1, gamma2, gamma3] data_dir = ['../data_luxury_5core/valid1', '../data_luxury_5core/valid2'] score_sum = 0 for i in range(len(data_dir)): # dataload dataset = AmazonDataset(data_dir[i], model_name='TransE') edges = [[r[0], r[1]] for r in dataset.triplet_df.values] # user-itemとitem-userどちらの辺も追加 for r in dataset.triplet_df.values: if r[2] == 0: edges.append([r[1], r[0]]) #user_items_test_dict = pickle.load(open('./data/user_items_test_dict.pickle', 'rb')) # load network G = nx.DiGraph() G.add_nodes_from([i for i in range(len(dataset.entity_list))]) G.add_edges_from(edges) ranking_mat = get_ranking_mat(G, dataset, model[i], gamma, alpha, beta) #score = topn_precision(ranking_mat, user_items_test_dict) evaluater = Evaluater(data_dir[i]) score = evaluater.topn_map(ranking_mat) score_sum += score mi, sec = time_since(time.time() - start) print('{}m{}sec'.format(mi, sec)) return -1 * score_sum / 2
def objective(trial): start = time.time() # hyper parameter alpha = trial.suggest_uniform('alpha', 0, 0.5) beta = trial.suggest_uniform('beta', 0, 0.5) gamma1 = trial.suggest_uniform('gamma1', 0, 1) gamma2 = trial.suggest_uniform('gamma2', 0, 1) gamma3 = trial.suggest_uniform('gamma3', 0, 1) gamma = [gamma1, gamma2, gamma3] data_dir = ['../' + data_path + '/valid1', '../' + data_path + '/valid2'] score_sum = 0 for i in range(len(data_dir)): # dataload dataset = AmazonDataset(data_dir[i], model_name='SparseTransE') # load network edges = [[r[0], r[1]] for r in dataset.triplet_df.values] # user-itemとitem-userどちらの辺も追加 for r in dataset.triplet_df.values: if r[2] == 0: edges.append([r[1], r[0]]) G = nx.DiGraph() G.add_nodes_from([i for i in range(len(dataset.entity_list))]) G.add_edges_from(edges) ranking_mat = get_ranking_mat(G, dataset, model[i], gamma, alpha, beta) #score = topn_precision(ranking_mat, user_items_test_dict) evaluater = Evaluater(data_dir[i]) score = evaluater.topn_map(ranking_mat) score_sum += score mi, sec = time_since(time.time() - start) print('{}m{}sec'.format(mi, sec)) return -1 * score_sum / 2
def objective(trial): start = time.time() import gc gc.collect() dataset = AmazonDataset('./data') embedding_dim = trial.suggest_discrete_uniform('embedding_dim', 16, 64, 16) bpr = BPR(int(embedding_dim), len(dataset.user_list), len(dataset.item_list)).to(device) batch_size = trial.suggest_discrete_uniform('batch_size', 64, 256, 64) iterater = TrainIterater(batch_size=int(batch_size)) lr = trial.suggest_loguniform('lr', 1e-5, 1e-2) weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2) warmup = trial.suggest_int('warmup', 100, 500) #warmup = trial.suggest_int('warmup', 1, 5) lr_decay_every = trial.suggest_int('lr_decay_every', 1, 5) lr_decay_rate = trial.suggest_uniform('lr_decay_rate', 0.5, 1) score = iterater.iterate_epoch(bpr, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup, lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5) torch.cuda.empty_cache() mi, sec = time_since(time.time() - start) print('{}m{}sec'.format(mi, sec)) return -1 * score
class TrainIterater(): def __init__(self, batch_size, data_dir, model_name='DistMulti'): #self.dataset = dataloader.AmazonDataset('./data') #self.dataset = AmazonDataset('./data', model_name=model_name) self.data_dir = data_dir self.dataset = AmazonDataset(self.data_dir, model_name=model_name) self.batch_size = batch_size self.model_name = model_name def train(self, batch, loss_func, optimizer, model): optimizer.zero_grad() if self.model_name == 'DistMulti' or self.model_name == 'Complex': triplet, y_train = batch h_entity_tensor = torch.tensor(triplet[:, 0], dtype=torch.long, device=device) t_entity_tensor = torch.tensor(triplet[:, 1], dtype=torch.long, device=device) relation_tensor = torch.tensor(triplet[:, 2], dtype=torch.long, device=device) y_train = torch.tensor(y_train, dtype=torch.float, device=device) pred = model(h_entity_tensor, t_entity_tensor, relation_tensor) loss = loss_func(pred, y_train) elif self.model_name == 'TransE': posi_batch, nega_batch = batch h = torch.tensor(posi_batch[:, 0], dtype=torch.long, device=device) t = torch.tensor(posi_batch[:, 1], dtype=torch.long, device=device) r = torch.tensor(posi_batch[:, 2], dtype=torch.long, device=device) n_h = torch.tensor(nega_batch[:, 0], dtype=torch.long, device=device) n_t = torch.tensor(nega_batch[:, 1], dtype=torch.long, device=device) n_r = torch.tensor(nega_batch[:, 2], dtype=torch.long, device=device) pred = model(h, t, r, n_h, n_t, n_r) loss = torch.sum(pred) elif self.model_name == 'SparseTransE': posi_batch, nega_batch, batch_user, batch_item, batch_brand = batch h = torch.tensor(posi_batch[:, 0], dtype=torch.long, device=device) t = torch.tensor(posi_batch[:, 1], dtype=torch.long, device=device) r = torch.tensor(posi_batch[:, 2], dtype=torch.long, device=device) n_h = torch.tensor(nega_batch[:, 0], dtype=torch.long, device=device) n_t = torch.tensor(nega_batch[:, 1], dtype=torch.long, device=device) n_r = torch.tensor(nega_batch[:, 2], dtype=torch.long, device=device) reg_user = torch.tensor(batch_user, dtype=torch.long, device=device) reg_item = torch.tensor(batch_item, dtype=torch.long, device=device) reg_brand = torch.tensor(batch_brand, dtype=torch.long, device=device) pred = model(h, t, r, n_h, n_t, n_r, reg_user, reg_item, reg_brand) loss = torch.sum(pred) elif self.model_name == 'RegComplex': triplet, y_train, batch_user, batch_item, batch_brand = batch h_entity_tensor = torch.tensor(triplet[:, 0], dtype=torch.long, device=device) t_entity_tensor = torch.tensor(triplet[:, 1], dtype=torch.long, device=device) relation_tensor = torch.tensor(triplet[:, 2], dtype=torch.long, device=device) y_train = torch.tensor(y_train, dtype=torch.float, device=device) reg_user = torch.tensor(batch_user, dtype=torch.long, device=device) reg_item = torch.tensor(batch_item, dtype=torch.long, device=device) reg_brand = torch.tensor(batch_brand, dtype=torch.long, device=device) pred, reg = model(h_entity_tensor, t_entity_tensor, relation_tensor, reg_user, reg_item, reg_brand) loss = loss_func(pred, y_train) + reg loss.backward() optimizer.step() return loss def iterate_train(self, model, lr=0.001, weight_decay=0, print_every=2000, plot_every=50): optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) # optimizer = optim.SGD(model.parameters(), lr=lr) loss_func = nn.BCELoss() print_loss_total = 0 plot_loss_list = [] plot_loss_total = 0 if self.model_name == 'DistMulti' or self.model_name == 'Complex' or self.model_name == 'RegComplex': train_num = len(self.dataset.triplet_df) + len( self.dataset.nega_triplet_df) elif self.model_name == 'TransE' or self.model_name == 'SparseTransE': train_num = len(self.dataset.triplet_df) start_time = time.time() for i in range(int(train_num / self.batch_size) + 1): batch = self.dataset.get_batch(batch_size=self.batch_size) loss = self.train(batch, loss_func, optimizer, model) print_loss_total += loss.detach() plot_loss_total += loss.detach() # print_everyごとに現在の平均のlossと、時間、dataset全体に対する進捗(%)を出力 if (i + 1) % print_every == 0: runtime = time.time() - start_time mi, sec = self.time_since(runtime) avg_loss = print_loss_total / print_every data_percent = int(i * self.batch_size / train_num * 100) print('train loss: {:e} processed: {}({}%) {}m{}sec'. format(avg_loss, i * self.batch_size, data_percent, mi, sec)) print_loss_total = 0 # plot_everyごとplot用のlossをリストに記録しておく if (i + 1) % plot_every == 0: avg_loss = plot_loss_total / plot_every plot_loss_list.append(avg_loss) plot_loss_total = 0 return plot_loss_list def time_since(self, runtime): mi = int(runtime / 60) sec = int(runtime - mi * 60) return (mi, sec) def iterate_epoch(self, model, lr, epoch, weight_decay=0, warmup=0, lr_decay_rate=1, lr_decay_every=10, eval_every=5, early_stop=False): eval_model = Evaluater(self.data_dir, model_name=self.model_name) #es = EarlyStop(self.data_dir[0:-6] + 'early_stopping/', self.model_name, patience=6) es = EarlyStop('../data_beauty_2core_es/early_stopping/', self.model_name, patience=6) plot_loss_list = [] plot_score_list = [] for i in range(epoch): plot_loss_list.extend( self.iterate_train(model, lr=lr, weight_decay=weight_decay, print_every=10000)) # early stop if early_stop: pre_model = es.early_stop(model) if pre_model: print('Early Stop eposh: {}'.format(i + 1)) return eval_model.topn_map(pre_model) # lrスケジューリング if i > warmup: if (i - warmup) % lr_decay_every == 0: lr = lr * lr_decay_rate if (i + 1) % eval_every == 0: #score = eval_model.topn_precision(model) #print('epoch: {} precision: {}'.format(i, score)) score = eval_model.topn_map(model) print('epoch: {} map: {}'.format(i, score)) plot_score_list.append(score) #self._plot(plot_loss_list) #self._plot(plot_score_list) #return eval_model.topn_precision(model) return eval_model.topn_map(model) def _plot(self, loss_list): # ここもっとちゃんと書く plt.plot(loss_list) plt.show()
# Data augmentation and normalization for training # Just normalization for validation """ train_transforms = transforms.Compose([ transforms.RandomHorizontalFlip(), utils.RandomRotation(), utils.RandomTranslation(), utils.RandomVerticalFlip(), transforms.ToTensor()]) """ train_transforms = transforms.Compose([transforms.ToTensor()]) print("Initializing Datasets and Dataloaders...") data_path = '/home/jlcastillo/Database_real/train-tif-v2' # Create training, validation and test datasets train_dataset = AmazonDataset('csv/train_v2.csv', data_path, 'csv/labels.txt', train_transforms) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) # check the size of your datatset dataset_sizes = {} dataset_sizes['train'] = len(train_dataset) print('Training dataset size:', dataset_sizes['train']) # -------------------------- MODEL -------------------------- ## URL`s a los pesos RESNET_18 = 'https://download.pytorch.org/models/resnet18-5c106cde.pth' RESNET_101 = 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth'
torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) kwargs = {'pin_memory': True} if args.cuda else {} # -------------------------- LOADING THE DATA -------------------------- # Data augmentation and normalization for training # Just normalization for validation print("Initializing Datasets and Dataloaders...") data_path = '/home/jlcastillo/Database_real/train-jpg' # Create training, validation and test datasets train_dataset = AmazonDataset('csv/train_v2.csv', data_path, 'csv/labels.txt', transform=transforms.Compose([ Rescale((args.input_size, args.input_size)), transforms.ToTensor() ])) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) # check the size of your datatset dataset_sizes = {} dataset_sizes['train'] = len(train_dataset) print('Training dataset size:', dataset_sizes['train']) # -------------------------- MODEL -------------------------- ## URL`s a los pesos
from models import DistMulti, TransE from training import TrainIterater from evaluate import Evaluater import optuna import time device = torch.device("cuda" if torch.cuda.is_available() else "cpu") import warnings warnings.filterwarnings('ignore') # dataload model_name = 'TransE' dataset = AmazonDataset('./data', model_name='TransE') edges = [[r[0], r[1]] for r in dataset.triplet_df.values] # load network G = nx.DiGraph() G.add_nodes_from([i for i in range(len(dataset.entity_list))]) G.add_edges_from(edges) def reconstruct_kg(model): with torch.no_grad(): batch_size = int(len(dataset.item_list) / 2) item_index = [ dataset.entity_list.index(item) for item in dataset.item_list ] user_index = [
if __name__ == '__main__': args = sys.argv model_name = args[1] params = load_params() print(params) import gc gc.collect() # dataload data_dir = '../' + data_path + '/test/' dataset = AmazonDataset(data_dir, model_name=model_name) relation_size = len(set(list(dataset.triplet_df['relation'].values))) entity_size = len(dataset.entity_list) embedding_dim = params['embedding_dim'] alpha = params['alpha'] model = SparseTransE(int(embedding_dim), relation_size, entity_size, alpha=alpha).to(device) batch_size = params['batch_size'] iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir, model_name=model_name) lr = params['lr'] weight_decay = params['weight_decay'] warmup = 350 lr_decay_every = 2
return -1 * score if __name__ == '__main__': params = load_param('./result_beauty') embedding_dim = params['embedding_dim'] batch_size = params['batch_size'] lr = params['lr'] weight_decay = params['weight_decay'] warmup = params['warmup'] lr_decay_every = params['lr_decay_every'] lr_decay_rate = params['lr_decay_rate'] #data_dir = '../data_beauty_2core_es/test/bpr' data_dir = '../data_beauty_2core_es/test/bpr' dataset = AmazonDataset(data_dir) bpr = BPR(int(embedding_dim), len(dataset.user_list), len(dataset.item_list)).to(device) iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir) score = iterater.iterate_epoch(bpr, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup, lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5, early_stop=True) # test結果を記録 np.savetxt('./result_beauty/score.txt', np.array([score]))
# Define transformations # If using pretrained models normalization should also be added. # normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], # std=[0.229, 0.224, 0.225]) test_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) #val_transforms = transforms.Compose([transforms.Scale(args.scale), # transforms.ToTensor()]) # Create dataloaders kwargs = {'pin_memory': True} if cuda else {} testset = AmazonDataset('csv/sample_submission_v2.csv', '/home/jlcastillo/Database_real/test_full', 'csv/labels.txt', args.nir_channel, test_transforms) test_loader = DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=args.nworkers, **kwargs) def fscore(prediction): """ Get the fscore of the validation set. Gives a good indication of score on public leaderboard""" target = torch.FloatTensor(0, 17) for i, (_, y) in enumerate(val_loader): target = torch.cat((target, y), 0) fscore = fbeta_score(target.numpy(),
# -------------------------- LOADING THE DATA -------------------------- # Data augmentation and normalization for training # Just normalization for validation train_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) print("Initializing Datasets and Dataloaders...") data_path = '/home/jlcastillo/Database_real/train-jpg' # Create training, validation and test datasets train_dataset = AmazonDataset('csv/train.csv', data_path, 'csv/labels.txt', args.nir_channel, transform=train_transforms) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) #Val val_dataset = AmazonDataset('csv/val.csv', data_path, 'csv/labels.txt', args.nir_channel, transform=train_transforms) val_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size,
def __init__(self, data_dir): #self.user_num = user_num self.dataset = AmazonDataset(data_dir=data_dir)
reg_user = torch.tensor(batch_user, dtype=torch.long, device=device) reg_item = torch.tensor(batch_item, dtype=torch.long, device=device) reg_brand = torch.tensor(batch_brand, dtype=torch.long, device=device) pred = model(h, t, r, n_h, n_t, n_r, reg_user, reg_item, reg_brand) loss = torch.sum(pred) return loss def valid_metric(self, model): return 0 if __name__ == '__main__': import models dataset = AmazonDataset('../data_beauty_2core_es/valid1/', 'TransE') relation_size = len(set(list(dataset.triplet_df['relation'].values))) entity_size = len(dataset.entity_list) model = models.TransE(10, relation_size, entity_size).to(device) es = EarlyStop('../data_beauty_2core_es/early_stopping/', 'TransE', 10) es.early_stop(model)
def __init__(self, data_dir, model_name='DistMulti'): self.dataset = AmazonDataset(data_dir, model_name=model_name) self.model_name = model_name
if amazon_data[0] == 'b': data_path = 'data_' + amazon_data + '_2core' elif amazon_data[0] == 'l': data_path = 'data_' + amazon_data + '_5core' model_name = args[2] params = load_params() print(params) import gc gc.collect() # dataload data_dir = '../' + data_path + '/test/' dataset = AmazonDataset(data_dir, model_name='SparseTransE') relation_size = len(set(list(dataset.triplet_df['relation'].values))) entity_size = len(dataset.entity_list) embedding_dim = params['embedding_dim'] alpha = params['alpha'] model = SparseTransE(int(embedding_dim), relation_size, entity_size, alpha=alpha).to(device) batch_size = params['batch_size'] iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir, model_name=model_name)
loss_total += loss.detach() return loss_total / len(self.user_item_train_df) def valid_loss(self, batch, y_train, loss_func, model): with torch.no_grad(): posi_batch, nega_batch = batch user_tensor = torch.tensor(posi_batch[:, 0], dtype=torch.long, device=device) item_tensor = torch.tensor(posi_batch[:, 1], dtype=torch.long, device=device) nega_item_tensor = torch.tensor(nega_batch[:, 1], dtype=torch.long, device=device) pred = model(user_tensor, item_tensor, nega_item_tensor) loss = loss_func(pred, y_train) return loss def valid_metric(self, model): return 0 if __name__ == '__main__': import bpr_model dataset = AmazonDataset('../data_beauty_2core_es/valid1/bpr/') user_size = len(dataset.user_list) item_size = len(dataset.item_list) model = bpr_model.BPR(32, user_size, item_size) es = EarlyStop('../data_beauty_2core_es/early_stopping/bpr/', 10) es.early_stop(model)