Пример #1
0
                            sampler=sampler,
                            collate_fn=movie_collate_fn)
    else:
        sampler = None
        # noinspection PyTypeChecker
        loader = DataLoader(dataset,
                            batch_size=args.batch,
                            sampler=sampler,
                            shuffle=shuffle,
                            collate_fn=movie_collate_fn)
    return loader


if __name__ == '__main__':
    data_dir = os.path.join(
        os.getcwd(), '../data') if not is_mac_or_pycharm() else os.path.join(
            os.getcwd(), '../data_sample')

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--data_dir',
        default=data_dir,
        type=str,
        required=False,
        help='a data directory which have downloaded, corpus text, vocab files.'
    )
    parser.add_argument("--corpus",
                        default=os.path.join(data_dir, "kowiki.txt"),
                        type=str,
                        required=False,
                        help="input text file")
Пример #2
0
                    train_model.module.albert.save(best_epoch, best_loss,
                                                   args.pretrain_save)
                else:
                    train_model.albert.save(best_epoch, best_loss,
                                            args.pretrain_save)

                pbar.set_postfix_str(
                    f"best epoch: {best_epoch}, loss: {best_loss:.4f}")

    if 1 < args.n_gpu:
        destroy_process_group()


if __name__ == '__main__':
    data_dir = os.path.join(
        os.getcwd(), '../data') if not is_mac_or_pycharm() else os.path.join(
            os.getcwd(), '../data_sample')
    data_dir = os.path.abspath(data_dir)

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--data_dir',
        default=data_dir,
        type=str,
        required=False,
        help='a data directory which have downloaded, corpus text, vocab files.'
    )
    parser.add_argument('--vocab',
                        default=os.path.join(data_dir, 'kowiki.model'),
                        type=str,
                        required=False,
Пример #3
0
                    if best_epoch + 5 < epoch:  # early stop
                        break

            pbar.update()
            break
        print(f'total_memory: {torch.cuda.get_device_properties(rank).total_memory / (1024 * 1024):.3f} MB')

    if master and args.wandb:
        wandb.save(args.name)
    if 1 < args.n_gpu:
        destroy_process_group()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', default='../data' if not is_mac_or_pycharm() else '../data_sample', type=str, required=False,
                        help='a data directory which have downloaded, corpus text, vocab files.')
    parser.add_argument('--vocab', default='kowiki.model', type=str, required=False,
                        help='vocab file')
    parser.add_argument('--config', default='config_half.json', type=str, required=False,
                        help='config file')
    parser.add_argument('--epoch', default=20 if not is_mac_or_pycharm() else 4, type=int, required=False,
                        help='max epoch')
    parser.add_argument('--gradient_accumulation', default=1, type=int, required=False,
                        help='real batch size = gradient_accumulation_steps * batch')
    parser.add_argument('--batch', default=256 if not is_mac_or_pycharm() else 4, type=int, required=False,
                        help='batch')  # batch=256 for Titan XP, batch=512 for V100
    parser.add_argument('--gpu', default=None, type=int, required=False,
                        help='GPU id to use.')
    parser.add_argument('--seed', type=int, default=42, required=False,
                        help='random seed for initialization')
Пример #4
0
            if master and best_score < score:
                best_epoch, best_loss, best_score = epoch, loss, score
                if isinstance(train_model, DistributedDataParallel):
                    train_model.module.save(best_epoch, best_loss, best_score, args.save)
                else:
                    train_model.save(best_epoch, best_loss, best_score, args.save)

                pbar.set_postfix_str(f"best epoch: {best_epoch}, loss: {best_loss:.4f}, accuracy: {best_score:.3f}")

    if 1 < args.n_gpu:
        destroy_process_group()


if __name__ == '__main__':
    data_dir = os.path.join(os.getcwd(), '../data') if not is_mac_or_pycharm() else os.path.join(os.getcwd(), '../data_sample')
    data_dir = os.path.abspath(data_dir)

    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', default=data_dir, type=str, required=False,
                        help='a data directory which have downloaded, corpus text, vocab files.')
    parser.add_argument("--vocab", default=os.path.join(data_dir, "kowiki.model"), type=str, required=False,
                        help="vocab file")
    parser.add_argument("--train", default=os.path.join(data_dir, "ratings_train.json"), type=str, required=False,
                        help="input train file")
    parser.add_argument("--test", default=os.path.join(data_dir, "ratings_test.json"), type=str, required=False,
                        help="input test file")
    parser.add_argument("--pretrain_save", default='bert.pth', type=str, required=False,
                        help="save file")
    parser.add_argument("--save", default="save_best.pth", type=str, required=False,
                        help="save file")
Пример #5
0
    wget.download(
        "https://raw.githubusercontent.com/e9t/nsmc/master/ratings_train.txt",
        data_dir)
    print()
    print("download data/ratings_test.txt")
    wget.download(
        "https://raw.githubusercontent.com/e9t/nsmc/master/ratings_test.txt",
        data_dir)
    print()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--data_dir",
        default="data" if not is_mac_or_pycharm() else "data_sample",
        type=str,
        required=False,
        help="a data directory which have downloaded, corpus text, vocab files."
    )
    args = parser.parse_args()
    args.data_dir = os.path.join(os.getcwd(), args.data_dir)
    print(args)

    if not os.path.exists("data"):
        os.makedirs("data")

    if not os.path.isfile(os.path.join(
            args.data_dir, "ratings_train.json")) or not os.path.isfile(
                os.path.join(args.data_dir, "ratings_test.json")):
        download_data(args.data_dir)