def __init__(self, config): self.params = config self.device = device self.word2index, self.index2word, self.embeddings = pickle.load(open(config.data_pickle, 'rb')) train_dataset = Loader(config, config.p_train_data) val_dataset = Loader(config, config.p_val_data) test_dataset = Loader(config, config.p_test_data) self.model = Model(config, self.embeddings).to(self.device) self.train_loader = DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=True) self.val_loader = DataLoader(dataset=val_dataset, batch_size=config.batch_size, shuffle=False) self.test_loader = DataLoader(dataset=test_dataset, batch_size=config.batch_size, shuffle=False) params = filter(lambda param: param.requires_grad, self.model.parameters()) self.optimizer = torch.optim.Adam(lr=config.learning_rate, betas=(config.beta1, config.beta2), eps=1e-7, weight_decay=3e-7, params=params) # lr = config.learning_rate # base_lr = 1.0 # params = filter(lambda param: param.requires_grad, slef.model.parameters()) # optimizer = torch.optim.Adam(lr=base_lr, betas=(config.beta1, config.beta2), eps=1e-7, weight_decay=3e-7, params=params) # cr = lr / math.log2(config.lr_warm_up_num) # scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, # lr_lambda=lambda ee: cr * math.log2(ee + 1) if ee < config.lr_warm_up_num else lr) self.model_path = os.path.join(self.params.cache_dir) if not os.path.exists(self.model_path): print('create path: ', self.model_path) os.makedirs(self.model_path) self.best_model = None self.lr_epoch = 0
def main(_): if config.mode == "train": trainer = Trainer(config) trainer.train() elif config.mode == "preprocess": dataloader.preprocess(_) elif config.mode == "debug": trainer = Trainer(config) train_dataset = Loader(config, config.p_train_data) train_loader = DataLoader(dataset=train_dataset, batch_size=2, shuffle=False) data_iter = iter(train_loader) frame_vecs, frame_n, ques, ques_n, start_frame, end_frame = data_iter.next() frame_vecs = frame_vecs.to(trainer.device) ques = ques.to(trainer.device) # Forward pass p1, p2 = trainer.model(frame_vecs, ques) y1, y2 = start_frame.to(trainer.device), end_frame.to(trainer.device) print(p1.shape,p2.shape,y1.shape,y2.shape) loss1 = F.nll_loss(p1, y1, reduction='elementwise_mean') loss2 = F.nll_loss(p2, y2, reduction='elementwise_mean') loss = (loss1 + loss2) / 2 # Backward and optimize trainer.optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_value_(trainer.model.parameters(), 1) trainer.optimizer.step() print(loss.item()) elif config.mode == "test": trainer = Trainer(config) trainer.evaluate() else: print("Unknown mode") exit(0)
def makecsv(file, model, loadfile): cuda = False kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {} data_loader = Loader(c.FILE_TRAIN_LABELED_AUG, c.FILE_TRAIN_UNLABELED, c.FILE_TEST, 'data/test-labeled.p', kwargs) test_loader = data_loader.getTest() test_actual = data_loader.getValidation() label_predict = np.array([]) mnist_model = model if loadfile: mnist_model = torch.load(model) correct = 0 for data, target in test_loader: mnist_model.eval() data, target = Variable(data, volatile=True), Variable(target) output = mnist_model(data) temp = output.data.max(1)[1] pred = output.data.max(1)[1] correct += pred.eq(target.data).cpu().sum() label_predict = np.concatenate((label_predict, temp.numpy().reshape(-1))) print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) predict_label = pd.DataFrame(label_predict, columns=['label'], dtype=int) predict_label.reset_index(inplace=True) predict_label.rename(columns={'index': 'ID'}, inplace=True) filename = 'predictions/' + file + "-labeled.csv" predict_label.to_csv(filename, index=False) label_predict = np.array([]) correct = 0 for data, target in test_actual: mnist_model.eval() data, target = Variable(data, volatile=True), Variable(target) output = mnist_model(data) temp = output.data.max(1)[1] pred = output.data.max(1)[1] # get the index of the max log-probability correct += pred.eq(target.data).cpu().sum() label_predict = np.concatenate((label_predict, temp.numpy().reshape(-1))) print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) predict_label = pd.DataFrame(label_predict, columns=['label'], dtype=int) predict_label.reset_index(inplace=True) predict_label.rename(columns={'index': 'ID'}, inplace=True) filename = 'predictions/' + file + "-unlabeled.csv" predict_label.to_csv(filename, index=False)
def getDataLoaderDict(idxModel): # idxModel is tuple print("\nStarted Preprocessing") trainDataLoaderDict = {} testDataLoaderDict = {} for idx in idxModel: print("-Preprocessing model{} dataloader".format(idx)) loader = Loader(idx) trainLoader, testLoader = loader.getLoader() trainDataLoaderDict[idx] = trainLoader testDataLoaderDict[idx] = testLoader print("-Completed Preprocessing model{} dataloader\n".format(idx)) print("All Completed Preprocessing\n") return trainDataLoaderDict, testDataLoaderDict
def main(_): print 'begin loading data...' loader = Loader(FLAGS.mincount, FLAGS.maxlen, full_train=False) print 'finished loading data\n' print 'begin building model...' # dataloader, embedding_size, max_epoch, learning_rate, keep_prob model = Model(loader, embedding_size=FLAGS.embedding_size, max_epoch=FLAGS.max_epoch, learning_rate=FLAGS.learning_rate, keep_prob=FLAGS.keep_prob) print 'finished building model\n' # model.train(full_train=True) # model.save_doc_vector() model.many_test()
# read word2vec with open('/home/student/glove/glove.6B.100d.txt', 'r') as f: lines = f.readlines() dictionary = {} weight_matrix = [] for i, line in enumerate(lines): word = line.split() dictionary[word[0]] = i weight_matrix.append([float(word[i]) for i in range(1, len(word))]) weight_matrix = np.array(weight_matrix) # ------------------------------------------------------------------------------- # # read data train_data = Loader(0, dictionary) valid_data = Loader(1, dictionary) test_data = Loader(2, dictionary) train_data = DataLoader(train_data, batch_size=128, shuffle=True) valid_data = DataLoader(valid_data, batch_size=128, shuffle=False) test_data = DataLoader(test_data, batch_size=128, shuffle=False) # ------------------------------------------------------------------------------ # model = SentenceCompression(100, 200, 50, 2, weight_matrix, len(dictionary)).cuda() lr = 0.0005 num_epoch = 20 criterion = nn.CrossEntropyLoss().cuda() optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr,
import scipy.sparse as sp import numpy as np from dataloader import Loader from scipy.sparse import csr_matrix loader = Loader() R = loader.UserItemNet.tolil() twohop = 0.004 adj_user = R.T.todok() print("generating 2") rowsum_user = np.array(adj_user.sum(axis=0)) D_user = np.power(rowsum_user, -0.5).flatten() D_user[np.isinf(D_user)] = 0 Dmat_user = sp.diags(D_user) print("generating 3") adj_item = R.todok() print("generating 4") rowsum_item = np.array(adj_item.sum(axis=0)) D_item = np.power(rowsum_item, -0.5).flatten() D_item[np.isinf(D_item)] = 0 Dmat_item = sp.diags(D_item) print("generating 5") norm_user = Dmat_item.dot(adj_user).dot(Dmat_user) norm_item = Dmat_user.dot(adj_item).dot(Dmat_item) def sparsify_propagation(adj, hop_thres): adj_valid = (adj > hop_thres)
"word2vec": "data/word2vec.bin", "is_origin_dataset": True, "train_json": "data/activity-net/train.json", "val_json": "data/activity-net/val_1.json", "test_json": "data/activity-net/val_2.json", "train_data": "data/activity-net/train_data.json", "val_data": "data/activity-net/val_data.json", "test_data": "data/activity-net/test_data.json", "feature_path": "data/activity-c3d", "feature_path_tsn": "data/tsn_score" } device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') word2vec = KeyedVectors.load_word2vec_format(config["word2vec"], binary=True) train_dataset = Loader(config, config['train_data'], word2vec, flag=True) val_dataset = Loader(config, config['val_data'], word2vec) test_dataset = Loader(config, config['test_data'], word2vec) train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True) val_loader = DataLoader(dataset=val_dataset, batch_size=64, shuffle=False) test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False) d_model = config['d_model'] d_ff = config['d_ff'] num_heads = config['num_heads']
batch_size = 2 device = '2' os.environ['CUDA_VISIBLE_DEVICES'] = device #main if __name__ == '__main__': #create model model = Model() model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr) loss = torch.nn.MSELoss() visdom_server = Visdom(port=8097) #create dataloade train_set = Loader(mode='train') val_set = Loader(mode='test') data_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=1) val_data_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=1) #training print(f'The number of training data {len(data_loader)}') print(f'The number of testing data {len(val_data_loader)}') for epoch in range(epochs):
action='store', default='model.p', help='modelname') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) print(args) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} data_loader = Loader(c.FILE_TRAIN_LABELED, c.FILE_TRAIN_UNLABELED, c.FILE_VALIDATION, c.FILE_TEST, kwargs) train_loader = data_loader.getLabeledtrain() unlabeled_train_loader = data_loader.getUnlabeledtrain() valid_loader = data_loader.getValidation() class AEMnist(nn.Module): def __init__(self): super(AEMnist, self).__init__() self.supervised = False # ENCODER self.conv1 = nn.Conv2d(1, 10, kernel_size=5) self.conv2 = nn.Conv2d(10, 20, kernel_size=5) self.conv2_drop = nn.Dropout2d() self.fc1 = nn.Linear(320, 200) self.fc2 = nn.Linear(200, 50)
from train import * from eval import * from predict import predict import paddle.distributed as dist from paddle.distributed import fleet def cprint(words: str): print(f"\033[0;30;43m{words}\033[0m") if __name__ == '__main__': print(config.config) train_dataset = Loader(path=config.dataset) name_datasets = config.dataset.split('/')[-1] Recmodel = NGCF(config.config, train_dataset) if config.config['multigpu']: print('using fleet multigpu training', Recmodel) dist.init_parallel_env() Recmodel = paddle.DataParallel(Recmodel) if config.config['multicpu']: fleet.init(is_collective=True) optimizer = fleet.distributed_optimizer(optimizer) Recmodel = fleet.distributed_model(Recmodel) print('using fleet multicpu training', Recmodel) Neg_k = 1 bpr = BPRLoss(Recmodel, config.config) f = open(f'logger/train_logger_{name_datasets}.txt', 'w') f_test = open(f'logger/test_logger_{name_datasets}.txt', 'w')
for result in pre_results: results['recall'] += result['recall'] results['precision'] += result['precision'] results['ndcg'] += result['ndcg'] results['recall'] /= float(len(users)) results['precision'] /= float(len(users)) results['ndcg'] /= float(len(users)) # results['auc'] = np.mean(auc_record) if multicore == 1: pool.close() print(results) return results def cprint(words : str): print(f"\033[0;30;43m{words}\033[0m") if __name__ == '__main__': train_dataset = Loader(path=world.dataset) Recmodel = NGCF(world.config, train_dataset) Neg_k = 1 bpr=BPRLoss(Recmodel, world.config) f = open (f'train_logger_{world.dataset}.txt','w') f_test = open (f'test_logger_{world.dataset}.txt','w') for epoch in range(world.TRAIN_epochs): if epoch %10 == 0: cprint("[TEST]") result = Test(train_dataset, Recmodel, epoch, world.config['multicore']) print(epoch, result, file=f_test, flush=True) output_information = BPR_train_original(train_dataset, Recmodel, bpr, epoch, neg_k=Neg_k,w=None) log_output = f'EPOCH[{epoch+1}/{world.TRAIN_epochs}] {output_information}' print(f'EPOCH[{epoch+1}/{world.TRAIN_epochs}] {output_information}') print(log_output, file=f, flush=True) f.close()
help="available datasets: [lastfm, gowalla, yelp2018, amazon-book]") parser.add_argument( '--path', type=str, default="./checkpoints", help="path to save weights") parser.add_argument( '--topks', nargs='?', default="[20]", help="@k test list") parser.add_argument('--epochs', type=int, default=1000) parser.add_argument('--seed', type=int, default=2020, help='random seed') return parser.parse_args() if __name__ == '__main__': args = parse_args() dataset = Loader(args, path=args.dataset) model = NGCF(args, dataset) neg = 1 optim = paddle.optimizer.Adam( parameters=model.parameters(), learning_rate=args.lr) for epoch in range(args.epochs): if epoch % 10 == 0: results = test(dataset, model, epoch, args) print(results) log = train(dataset, model, epoch, optim, args, neg_k=neg, w=None) print(f'EPOCH[{epoch+1}/{args.epochs}] {log}')
from yamlparams.utils import Hparam import sys from metrics import mean_average_presision_k, hitrate_k, novelty, coverage from dataloader import Loader from preprocessor import Preprocessor from model import TopPopular # Read config if len(sys.argv) < 2: raise AttributeError('Use config name to define model config') cfg_path = sys.argv[1] #'books_big_setting.yml' print('Using config ' + cfg_path) config = Hparam(cfg_path) loader = Loader(config.path) preprocessor = Preprocessor() print('Reading data') df = loader.get_views() print('Preprocessing') # train_df = preprocessor.filter_zeros(train_df) df = preprocessor.filter_lazy_users(df, 0) train_df, test_df = loader.split_train_test(df, config.min_user_views, config.testing.samples) train_df, test_df = preprocessor.filter_not_in_test_items(train_df, test_df) preprocessor.build_mappers(train_df.append(test_df)) train_df.user_id = train_df.user_id.apply(preprocessor.get_user_ix)
'num_workers': args.num_workers, 'pin_memory': True } if args.cuda else {} if args.cuda: torch.cuda.manual_seed(args.seed) captioning_dataset = json.load(open(args.captioning_dataset_path, "rb")) all_arts = pickle.load(open(args.articles_metadata, 'rb')) art2id = {} # ipdb.set_trace() for i, art in enumerate(all_arts): art2id[art] = i p = open(args.fake_articles, 'r') fake_articles = [json.loads(l) for i, l in enumerate(p)] train_loader = DataLoader(Loader(args, 'train', captioning_dataset, art2id, fake_articles), batch_size=args.batch_size, shuffle=True, collate_fn=collate) val_loader = DataLoader(Loader(args, 'valid', captioning_dataset, art2id, fake_articles), batch_size=args.batch_size, shuffle=False, collate_fn=collate, **kwargs) test_loader = DataLoader(Loader(args, 'test', captioning_dataset, art2id, fake_articles), batch_size=args.batch_size, shuffle=False, collate_fn=collate, **kwargs)
def main(): parser = Parser() config = parser.config for param, value in config.__dict__.items(): print(param + '.' * (50 - len(param) - len(str(value))) + str(value)) print() # Load previous checkpoint if it exists checkpoint = load_latest(config) # Create model model = load_model(config, checkpoint) # print number of parameters in the model n_params = sum([param.view(-1).size()[0] for param in model.parameters()]) print('Total number of parameters: \33[91m{}\033[0m'.format(n_params)) # Load train and test data train_loader, valid_loader, test_loader = Loader(config) n_batches = int(len(train_loader.dataset.train_data) / config.batch_size) # save the configuration with open(os.path.join(config.save, 'log.txt'), 'w') as file: json.dump('json_stats: ' + str(config.__dict__), file) # Instantiate the criterion, optimizer and learning rate scheduler criterion = torch.nn.CrossEntropyLoss(size_average=True) optimizer = torch.optim.SGD(model.parameters(), lr=config.LR, momentum=config.momentum, weight_decay=config.weight_decay, nesterov=config.nesterov) start_time = 0 if checkpoint is not None: start_epoch = checkpoint['time'] + 1 optimizer.load_state_dict(checkpoint['optimizer']) if config.lr_shape == 'multistep': scheduler = MultiStepLR(optimizer, milestones=[81, 122], gamma=0.1) elif config.lr_shape == 'cosine': if checkpoint is not None: scheduler = checkpoint['scheduler'] else: scheduler = CosineAnnealingRestartsLR(optimizer, 1, config.T_e, T_mul=config.T_mul) # The trainer handles the training loop and evaluation on validation set trainer = Trainer(model, criterion, config, optimizer, scheduler) epoch = 1 while True: # Train for a single epoch train_top1, train_loss, stop_training = trainer.train( epoch, train_loader) # Run model on the validation and test set valid_top1 = trainer.evaluate(epoch, valid_loader, 'valid') test_top1 = trainer.evaluate(epoch, test_loader, 'test') current_time = time.time() results = { 'epoch': epoch, 'time': current_time, 'train_top1': train_top1, 'valid_top1': valid_top1, 'test_top1': test_top1, 'train_loss': float(train_loss.data), } with open(os.path.join(config.save, 'results.txt'), 'w') as file: json.dump(str(results), file) file.write('\n') print( '==> Finished epoch %d (budget %.3f): %7.3f (train) %7.3f (validation) %7.3f (test)' % (epoch, config.budget, train_top1, valid_top1, test_top1)) if stop_training: break epoch += 1 if start_time >= config.budget: trainer.evaluate(epoch, test_loader, 'test') else: save_checkpoint(int(config.budget), trainer.model, trainer.optimizer, trainer.scheduler, config)
def setUp(self): #Wipe DB apiproxy_stub_map.apiproxy.GetStub('datastore_v3').Clear() #load default data l = Loader() l.loadDatabase()
default=100, metavar='N', help='how many batches to wait before logging training status') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) print(args) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} data_loader = Loader('data/train_labeled_aug.p', c.FILE_TRAIN_UNLABELED, c.FILE_VALIDATION, c.FILE_TEST, kwargs) train_loader = data_loader.getLabeledtrain() unlabeled_train_loader = data_loader.getUnlabeledtrain() valid_loader = data_loader.getValidation() model = Ladder() if args.cuda: model.cuda() l2loss = torch.nn.BCELoss( ) #torch.nn.L1Loss() # BCELoss : Pass through sigmoid #l2_2 = torch.nn.L1Loss() nllloss = nn.NLLLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
import torch import pickle import numpy as np import pandas as pd import torch.nn as nn import constants as c from dataloader import Loader from torch.autograd import Variable cuda = torch.cuda.is_available() kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {} data_loader = Loader(c.FILE_TRAIN_LABELED_AUG, c.FILE_TRAIN_UNLABELED, c.FILE_TEST, 'data/test-labeled.p', kwargs) test_loader = data_loader.getTest() test_actual = data_loader.getValidation() label_predict = np.array([]) def callval(mnist_model, test_loader, test_actual, model, file): label_predict = np.array([]) loadfile = True if loadfile: mnist = torch.load(model) mnist_model.load_state_dict(mnist) correct = 0 if torch.cuda.is_available(): mnist_model.cuda()
def read_data(self): self.loader = Loader() self.loader.read_data()
histogram_freq=1, write_graph=True, write_images=False) RP = ReduceLROnPlateau( monitor='val_loss', factor=0.1, patience=3, verbose=0, mode='auto', ) callbacks = [ES, TB, RP] #create data train_set = Loader(mode='train') data_loader = torch.utils.data.DataLoader(train_set, batch_size=300, shuffle=False, num_workers=1) for i, (x, y) in enumerate(data_loader): x = x.data.numpy() y = y.data.numpy() #main if __name__ == '__main__': model = simple_s2s() model.fit(x, y, epochs=2000, batch_size=1,