sampler=sampler, collate_fn=movie_collate_fn) else: sampler = None # noinspection PyTypeChecker loader = DataLoader(dataset, batch_size=args.batch, sampler=sampler, shuffle=shuffle, collate_fn=movie_collate_fn) return loader if __name__ == '__main__': data_dir = os.path.join( os.getcwd(), '../data') if not is_mac_or_pycharm() else os.path.join( os.getcwd(), '../data_sample') parser = argparse.ArgumentParser() parser.add_argument( '--data_dir', default=data_dir, type=str, required=False, help='a data directory which have downloaded, corpus text, vocab files.' ) parser.add_argument("--corpus", default=os.path.join(data_dir, "kowiki.txt"), type=str, required=False, help="input text file")
train_model.module.albert.save(best_epoch, best_loss, args.pretrain_save) else: train_model.albert.save(best_epoch, best_loss, args.pretrain_save) pbar.set_postfix_str( f"best epoch: {best_epoch}, loss: {best_loss:.4f}") if 1 < args.n_gpu: destroy_process_group() if __name__ == '__main__': data_dir = os.path.join( os.getcwd(), '../data') if not is_mac_or_pycharm() else os.path.join( os.getcwd(), '../data_sample') data_dir = os.path.abspath(data_dir) parser = argparse.ArgumentParser() parser.add_argument( '--data_dir', default=data_dir, type=str, required=False, help='a data directory which have downloaded, corpus text, vocab files.' ) parser.add_argument('--vocab', default=os.path.join(data_dir, 'kowiki.model'), type=str, required=False,
if best_epoch + 5 < epoch: # early stop break pbar.update() break print(f'total_memory: {torch.cuda.get_device_properties(rank).total_memory / (1024 * 1024):.3f} MB') if master and args.wandb: wandb.save(args.name) if 1 < args.n_gpu: destroy_process_group() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--data_dir', default='../data' if not is_mac_or_pycharm() else '../data_sample', type=str, required=False, help='a data directory which have downloaded, corpus text, vocab files.') parser.add_argument('--vocab', default='kowiki.model', type=str, required=False, help='vocab file') parser.add_argument('--config', default='config_half.json', type=str, required=False, help='config file') parser.add_argument('--epoch', default=20 if not is_mac_or_pycharm() else 4, type=int, required=False, help='max epoch') parser.add_argument('--gradient_accumulation', default=1, type=int, required=False, help='real batch size = gradient_accumulation_steps * batch') parser.add_argument('--batch', default=256 if not is_mac_or_pycharm() else 4, type=int, required=False, help='batch') # batch=256 for Titan XP, batch=512 for V100 parser.add_argument('--gpu', default=None, type=int, required=False, help='GPU id to use.') parser.add_argument('--seed', type=int, default=42, required=False, help='random seed for initialization')
if master and best_score < score: best_epoch, best_loss, best_score = epoch, loss, score if isinstance(train_model, DistributedDataParallel): train_model.module.save(best_epoch, best_loss, best_score, args.save) else: train_model.save(best_epoch, best_loss, best_score, args.save) pbar.set_postfix_str(f"best epoch: {best_epoch}, loss: {best_loss:.4f}, accuracy: {best_score:.3f}") if 1 < args.n_gpu: destroy_process_group() if __name__ == '__main__': data_dir = os.path.join(os.getcwd(), '../data') if not is_mac_or_pycharm() else os.path.join(os.getcwd(), '../data_sample') data_dir = os.path.abspath(data_dir) parser = argparse.ArgumentParser() parser.add_argument('--data_dir', default=data_dir, type=str, required=False, help='a data directory which have downloaded, corpus text, vocab files.') parser.add_argument("--vocab", default=os.path.join(data_dir, "kowiki.model"), type=str, required=False, help="vocab file") parser.add_argument("--train", default=os.path.join(data_dir, "ratings_train.json"), type=str, required=False, help="input train file") parser.add_argument("--test", default=os.path.join(data_dir, "ratings_test.json"), type=str, required=False, help="input test file") parser.add_argument("--pretrain_save", default='bert.pth', type=str, required=False, help="save file") parser.add_argument("--save", default="save_best.pth", type=str, required=False, help="save file")
wget.download( "https://raw.githubusercontent.com/e9t/nsmc/master/ratings_train.txt", data_dir) print() print("download data/ratings_test.txt") wget.download( "https://raw.githubusercontent.com/e9t/nsmc/master/ratings_test.txt", data_dir) print() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--data_dir", default="data" if not is_mac_or_pycharm() else "data_sample", type=str, required=False, help="a data directory which have downloaded, corpus text, vocab files." ) args = parser.parse_args() args.data_dir = os.path.join(os.getcwd(), args.data_dir) print(args) if not os.path.exists("data"): os.makedirs("data") if not os.path.isfile(os.path.join( args.data_dir, "ratings_train.json")) or not os.path.isfile( os.path.join(args.data_dir, "ratings_test.json")): download_data(args.data_dir)