def run(*argv): import argparse import time from utils import Storage parser = argparse.ArgumentParser( description= 'A seq2seq model with GRU encoder and decoder. Attention, beamsearch,\ dropout and batchnorm is supported.') args = Storage() parser.add_argument( '--name', type=str, default=None, help= 'The name of your model, used for tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)' ) parser.add_argument('--restore', type=str, default=None, help='Checkpoints name to load. \ "NAME_last" for the last checkpoint of model named NAME. "NAME_best" means the best checkpoint. \ You can also use "last" and "best", defaultly use last model you run. \ Attention: "NAME_last" and "NAME_best" are not guaranteed to work when 2 models with same name run in the same time. \ "last" and "best" are not guaranteed to work when 2 models run in the same time.\ Default: None (don\'t load anything)') parser.add_argument('--mode', type=str, default="train", help='"train" or "test". Default: train') parser.add_argument('--eh_size', type=int, default=384, help='Size of encoder GRU') parser.add_argument('--dh_size', type=int, default=200, help='Size of decoder GRU') parser.add_argument( '--droprate', type=float, default=0, help= 'The probability to be zerod in dropout. 0 indicates for don\'t use dropout' ) parser.add_argument('--batchnorm', action='store_true', help='Use bathnorm') parser.add_argument( '--decode_mode', type=str, choices=['max', 'sample', 'gumbel', 'samplek', 'beam'], default='beam', help= 'The decode strategy when freerun. Choices: max, sample, gumbel(=sample), \ samplek(sample from topk), beam(beamsearch). Default: beam') parser.add_argument( '--top_k', type=int, default=10, help='The top_k when decode_mode == "beam" or "samplek"') parser.add_argument( '--length_penalty', type=float, default=0.7, help= 'The beamsearch penalty for short sentences. The penalty will get larger when this becomes smaller.' ) parser.add_argument('--dataset', type=str, default='OpenSubtitles', help='Dataloader class. Default: OpenSubtitles') parser.add_argument( '--datapath', type=str, default='resources://OpenSubtitles', help='Directory for data set. Default: resources://OpenSubtitles') parser.add_argument('--epoch', type=int, default=100, help="Epoch for training. Default: 100") parser.add_argument( '--wvclass', type=str, default='Glove', help= "Wordvector class, none for not using pretrained wordvec. Default: Glove" ) parser.add_argument( '--wvpath', type=str, default="resources://Glove300d", help= "Resources of pretrained wordvector. Default: resources://Glove300d") parser.add_argument('--bert_model', type=str, default="bert-base-uncased", help="Name of bert model. Default: bert-base-uncased") parser.add_argument('--bert_vocab', type=str, default="bert-base-uncased", help="Name of bert vocab. Default: bert-base-uncased") parser.add_argument( '--out_dir', type=str, default="./output", help='Output directory for test output. Default: ./output') parser.add_argument( '--log_dir', type=str, default="./tensorboard", help='Log directory for tensorboard. Default: ./tensorboard') parser.add_argument( '--model_dir', type=str, default="./model", help='Checkpoints directory for model. Default: ./model') parser.add_argument( '--cache_dir', type=str, default="./cache", help='Checkpoints directory for cache. Default: ./cache') parser.add_argument('--cpu', action="store_true", help='Use cpu.') parser.add_argument('--debug', action='store_true', help='Enter debug mode (using ptvsd).') parser.add_argument( '--cache', action='store_true', help= 'Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)' ) cargs = parser.parse_args(argv) # Editing following arguments to bypass command line. args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime()) args.restore = cargs.restore args.mode = cargs.mode args.dataset = cargs.dataset args.datapath = cargs.datapath args.epochs = cargs.epoch args.wvclass = cargs.wvclass args.wvpath = cargs.wvpath args.bert_model = cargs.bert_model args.bert_vocab = cargs.bert_vocab args.out_dir = cargs.out_dir args.log_dir = cargs.log_dir args.model_dir = cargs.model_dir args.cache_dir = cargs.cache_dir args.debug = cargs.debug args.cache = cargs.cache args.cuda = not cargs.cpu # The following arguments are not controlled by command line. args.restore_optimizer = True load_exclude_set = [] restoreCallback = None args.batch_per_epoch = 500 args.embedding_size = 300 args.eh_size = cargs.eh_size args.dh_size = cargs.dh_size args.decode_mode = cargs.decode_mode args.top_k = cargs.top_k args.length_penalty = cargs.length_penalty args.droprate = cargs.droprate args.batchnorm = cargs.batchnorm args.lr = 1e-3 args.batch_size = 64 args.batch_num_per_gradient = 4 args.grad_clip = 5 args.show_sample = [0] # show which batch when evaluating at tensorboard args.max_sent_length = 50 args.checkpoint_steps = 20 args.checkpoint_max_to_keep = 5 import random random.seed(0) from main import main main(args, load_exclude_set, restoreCallback)
def run(*argv): import argparse import time from utils import Storage parser = argparse.ArgumentParser( description='A language model with GRU. Attention, beamsearch,\ dropout and batchnorm is supported.') args = Storage() parser.add_argument( '--name', type=str, default=None, help= 'The name of your model, used for tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)' ) parser.add_argument('--restore', type=str, default=None, help='Checkpoints name to load. \ "NAME_last" for the last checkpoint of model named NAME. "NAME_best" means the best checkpoint. \ You can also use "last" and "best", by default use last model you run. \ Attention: "NAME_last" and "NAME_best" are not guaranteed to work when 2 models with same name run in the same time. \ "last" and "best" are not guaranteed to work when 2 models run in the same time.\ Default: None (don\'t load anything)') parser.add_argument('--mode', type=str, default="train", help='"train" or "test". Default: train') parser.add_argument('--dh_size', type=int, default=200, help='Size of decoder GRU') parser.add_argument( '--droprate', type=float, default=0, help= 'The probability to be zeroed in dropout. 0 indicates for don\'t use dropout' ) parser.add_argument( '--decode_mode', type=str, choices=['max', 'sample', 'gumbel', 'samplek', 'beam'], default='samplek', help= 'The decode strategy when freerun. Choices: max, sample, gumbel(=sample), \ samplek(sample from topk), beam(beamsearch). Default: samplek') parser.add_argument('--batchnorm', action='store_true', help='Use bathnorm') parser.add_argument( '--top_k', type=int, default=10, help='The top_k when decode_mode == "beam" or "samplek"') parser.add_argument( '--length_penalty', type=float, default=0.7, help= 'The beamsearch penalty for short sentences. The penalty will get larger when this becomes smaller.' ) parser.add_argument('--temperature', type=float, default=1, help='Temperature. Default: 1') parser.add_argument( '--dataid', type=str, default='resources://MSCOCO', help='Resources/path for data set. Default: resources://MSCOCO') parser.add_argument('--epoch', type=int, default=100, help="Epoch for training. Default: 100") parser.add_argument('--batch_per_epoch', type=int, default=500, help="Batches per epoch. Default: 1500") parser.add_argument( '--wvid', type=str, default="resources://Glove300d", help= "Resources/path for pretrained wordvector. Default: resources://Glove300d" ) parser.add_argument( '--out_dir', type=str, default="./output", help='Output directory for test output. Default: ./output') parser.add_argument( '--log_dir', type=str, default="./tensorboard", help='Log directory for tensorboard. Default: ./tensorboard') parser.add_argument( '--model_dir', type=str, default="./model", help='Checkpoints directory for model. Default: ./model') parser.add_argument( '--cache_dir', type=str, default="./cache", help='Checkpoints directory for cache. Default: ./cache') parser.add_argument('--cpu', action="store_true", help='Use cpu.') parser.add_argument('--debug', action='store_true', help='Enter debug mode (using ptvsd).') parser.add_argument( '--cache', action='store_true', help= 'Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)' ) parser.add_argument('--seed', type=int, default=0, help='Specify random seed. Default: 0') parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate. Default: 0.001') cargs = parser.parse_args(argv) # general setting args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime()) args.restore = cargs.restore args.mode = cargs.mode args.out_dir = cargs.out_dir args.log_dir = cargs.log_dir args.model_dir = cargs.model_dir args.cache_dir = cargs.cache_dir args.debug = cargs.debug args.cache = cargs.cache args.cuda = not cargs.cpu ## dataset settings args.dataid = cargs.dataid args.tokenizer = "space" args.max_sent_length = 50 args.convert_to_lower_letter = False args.min_frequent_vocab_times = 10 args.min_rare_vocab_times = 0 args.wvid = cargs.wvid ## training settings args.epochs = cargs.epoch args.lr = cargs.lr args.batch_size = 64 args.batch_num_per_gradient = 4 args.grad_clip = 5 args.show_sample = [0] # show which batch when evaluating at tensorboard args.checkpoint_steps = 20 args.checkpoint_max_to_keep = 5 ## arguments for restoring checkpoints args.restore_optimizer = True load_exclude_set = [] restoreCallback = None ## architecture settings args.batch_per_epoch = cargs.batch_per_epoch args.embedding_size = 300 args.dh_size = cargs.dh_size args.droprate = cargs.droprate args.batchnorm = cargs.batchnorm ## decoding settings args.decode_mode = cargs.decode_mode args.top_k = cargs.top_k args.length_penalty = cargs.length_penalty args.temperature = cargs.temperature ## random seed args.seed = cargs.seed import random random.seed(cargs.seed) import torch torch.manual_seed(cargs.seed) import numpy as np np.random.seed(cargs.seed) from main import main main(args, load_exclude_set, restoreCallback)
def run(*argv): import argparse import time from utils import Storage parser = argparse.ArgumentParser(description='A classification model with GRU encoder and MLP for prediction. \ Dropout and batchnorm is supported.') args = Storage() parser.add_argument('--name', type=str, default=None, help='The name of your model, used for tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)') parser.add_argument('--restore', type=str, default=None, help='Checkpoints name to load. \ "NAME_last" for the last checkpoint of model named NAME. "NAME_best" means the best checkpoint. \ You can also use "last" and "best", defaultly use last model you run. \ Attention: "NAME_last" and "NAME_best" are not guaranteed to work when 2 models with same name run in the same time. \ "last" and "best" are not guaranteed to work when 2 models run in the same time.\ Default: None (don\'t load anything)') parser.add_argument('--mode', type=str, default="train", help='"train" or "test". Default: train') parser.add_argument('--eh_size', type=int, default=200, help='Size of encoder GRU') parser.add_argument('--class_num', type=int, default=5, help='Number of classes') parser.add_argument('--droprate', type=float, default=0, help='The probability to be zerod in dropout. 0 indicates for don\'t use dropout') parser.add_argument('--batchnorm', action='store_true', help='Use bathnorm') parser.add_argument('--dataset', type=str, default='SST', help='Dataloader class. Default: SST') parser.add_argument('--datapath', type=str, default='resources://SST', help='Directory for data set. Default: resources://SST') parser.add_argument('--epoch', type=int, default=100, help="Epoch for trainning. Default: 100") parser.add_argument('--wvclass', type=str, default='Glove', help="Wordvector class, none for not using pretrained wordvec. Default: Glove") parser.add_argument('--wvpath', type=str, default="resources://Glove300d", help="Directory for pretrained wordvector. Default: resources://Glove300d") parser.add_argument('--out_dir', type=str, default="./output", help='Output directory for test output. Default: ./output') parser.add_argument('--log_dir', type=str, default="./tensorboard", help='Log directory for tensorboard. Default: ./tensorboard') parser.add_argument('--model_dir', type=str, default="./model", help='Checkpoints directory for model. Default: ./model') parser.add_argument('--cache_dir', type=str, default="./cache", help='Checkpoints directory for cache. Default: ./cache') parser.add_argument('--cpu', action="store_true", help='Use cpu.') parser.add_argument('--debug', action='store_true', help='Enter debug mode (using ptvsd).') parser.add_argument('--cache', action='store_true', help='Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)') cargs = parser.parse_args(argv) # Editing following arguments to bypass command line. args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime()) args.restore = cargs.restore args.mode = cargs.mode args.dataset = cargs.dataset args.datapath = cargs.datapath args.epochs = cargs.epoch args.wvclass = cargs.wvclass args.wvpath = cargs.wvpath args.out_dir = cargs.out_dir args.log_dir = cargs.log_dir args.model_dir = cargs.model_dir args.cache_dir = cargs.cache_dir args.debug = cargs.debug args.cache = cargs.cache args.cuda = not cargs.cpu # The following arguments are not controlled by command line. args.restore_optimizer = True load_exclude_set = [] restoreCallback = None args.batch_per_epoch = 500 args.embedding_size = 300 args.eh_size = cargs.eh_size args.class_num = cargs.class_num args.droprate = cargs.droprate args.batchnorm = cargs.batchnorm args.lr = 1e-3 args.batch_size = 64 args.batch_num_per_gradient = 4 args.grad_clip = 5 args.show_sample = [0] # show which batch when evaluating at tensorboard args.max_sent_length = 50 args.checkpoint_steps = 20 args.checkpoint_max_to_keep = 5 import random random.seed(0) from main import main main(args, load_exclude_set, restoreCallback)
args.cuda = not cargs.cpu args.disentangle = cargs.disentangle args.droprate = cargs.droprate args.hist_len = cargs.hist_len args.hist_weights = cargs.hist_weights if args.hist_len != len(args.hist_weights): raise ValueError('the hist_len should be equal to the length of weights') args.hist_weights = np.array(args.hist_weights) / sum(args.hist_weights) # The following arguments are not controlled by command line. args.restore_optimizer = False args.load_exclude_set = [] args.restoreCallback = None args.batch_num_per_gradient = 1 args.embedding_size = 300 args.eh_size = 200 args.dh_size = 400 args.lr = 5e-4 args.batch_size = 8 args.grad_clip = 5 args.show_sample = [0] # show which batch when evaluating at tensotboard args.checkpoint_steps = 3 args.checkpoint_max_to_keep = 3 args.checkpoint_epoch = 5 random.seed(cargs.seed) np.random.seed(cargs.seed) torch.manual_seed(cargs.seed) random.seed(cargs.seed)
def run(*argv): import argparse import time from utils import Storage parser = argparse.ArgumentParser(description='A seq2seq model with GRU encoder and decoder. Attention, beamsearch,\ dropout and batchnorm is supported. It can train using RAML, Scheduled Sampling or Policy Gradient algorithms.') args = Storage() parser.add_argument('--name', type=str, default=None, help='The name of your model, used for tensorboard, etc. Default: runXXXXXX_XXXXXX (initialized by current time)') parser.add_argument('--model', type=str, default="basic",choices=["basic","raml","scheduled-sampling","policy-gradient"], help='The type of algorithm. Choices: basic, raml, schedule-sampling, policy-gradient. Default: basic Seq2seq') parser.add_argument('--restore', type=str, default=None, help='Checkpoints name to load. \ "NAME_last" for the last checkpoint of model named NAME. "NAME_best" means the best checkpoint. \ You can also use "last" and "best", by default use last model you run. \ It can also be an url started with "http". \ Attention: "NAME_last" and "NAME_best" are not guaranteed to work when 2 models with same name run in the same time. \ "last" and "best" are not guaranteed to work when 2 models run in the same time.\ Default: None (don\'t load anything)') parser.add_argument('--mode', type=str, default="train", help='"train" or "test". Default: train') parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate. Default: 0.001') parser.add_argument('--eh_size', type=int, default=200, help='Size of encoder GRU') parser.add_argument('--dh_size', type=int, default=200, help='Size of decoder GRU') parser.add_argument('--droprate', type=float, default=0, help='The probability to be zerod in dropout. 0 indicates for don\'t use dropout') parser.add_argument('--batchnorm', action='store_true', help='Use bathnorm') parser.add_argument('--decode_mode', type=str, choices=['max', 'sample', 'gumbel', 'samplek', 'beam'], default='beam', help='The decode strategy when freerun. Choices: max, sample, gumbel(=sample), \ samplek(sample from topk), beam(beamsearch). Default: beam') parser.add_argument('--top_k', type=int, default=10, help='The top_k when decode_mode == "beam" or "samplek"') parser.add_argument('--length_penalty', type=float, default=0.7, help='The beamsearch penalty for short sentences. The penalty will get larger when this becomes smaller.') parser.add_argument('--dataset', type=str, default='OpenSubtitles', help='Dataloader class. Default: OpenSubtitles') parser.add_argument('--dataid', type=str, default='resources://OpenSubtitles#OpenSubtitles', help='Resource id for data set. It can be a resource name or a local path. Default: resources://OpenSubtitles#OpenSubtitles') parser.add_argument('--epoch', type=int, default=100, help="Epoch for training. Default: 100") parser.add_argument('--batch_per_epoch', type=int, default=1500, help="Batches per epoch. Default: 1500") parser.add_argument('--wvclass', type=str, default='Glove', help="Wordvector class, none for not using pretrained wordvec. Default: Glove") parser.add_argument('--wvid', type=str, default="resources://Glove300d", help="Resource id for pretrained wordvector. Default: resources://Glove300d") parser.add_argument('--out_dir', type=str, default="./output", help='Output directory for test output. Default: ./output') parser.add_argument('--log_dir', type=str, default="./tensorboard", help='Log directory for tensorboard. Default: ./tensorboard') parser.add_argument('--model_dir', type=str, default="./model", help='Checkpoints directory for model. Default: ./model') parser.add_argument('--cache_dir', type=str, default="./cache", help='Checkpoints directory for cache. Default: ./cache') parser.add_argument('--cpu', action="store_true", help='Use cpu.') parser.add_argument('--device', type=int, default=0, help='Use cpu.') parser.add_argument('--debug', action='store_true', help='Enter debug mode (using ptvsd).') parser.add_argument('--cache', action='store_true', help='Use cache for speeding up load data and wordvec. (It may cause problems when you switch dataset.)') parser.add_argument('--seed', type=int, default=0, help='Specify random seed. Default: 0') # RAML parameters parser.add_argument('--raml_file', type=str, default='samples_iwslt14.txt', help='the samples and rewards described in RAML') parser.add_argument('--n_samples', type=int, default=10, help='number of samples for every target sentence') parser.add_argument('--tau', type=float, default=0.4, help='the temperature in RAML algorithm') # Scheduled sampling parameters parser.add_argument('--decay_factor', type=float, default=500., help='The hyperparameter controling the speed of increasing ' 'the probability of sampling from model. Default: 500.') # Policy Gradient parameters parser.add_argument('--epoch_teacherForcing', type=int, default=10, help='How long to run teacherForcing before running policy gradient. Default: 10') parser.add_argument('--nb_sample_training', type=int, default=20, help='How many samples we take for each batch during policy gradient. Default: 20') parser.add_argument('--policy_gradient_reward_mode', type=str, default='mean', help='How the policy gradient is applied. Default: mean') cargs = parser.parse_args(argv) # Editing following arguments to bypass command line. args.name = cargs.name or time.strftime("run%Y%m%d_%H%M%S", time.localtime()) args.model = cargs.model args.restore = cargs.restore args.mode = cargs.mode args.dataset = cargs.dataset args.datapath = cargs.dataid args.epochs = cargs.epoch args.wvclass = cargs.wvclass args.wvpath = cargs.wvid args.out_dir = cargs.out_dir args.log_dir = cargs.log_dir args.model_dir = cargs.model_dir args.cache_dir = cargs.cache_dir args.debug = cargs.debug args.cache = cargs.cache args.cuda = not cargs.cpu args.device = cargs.device # RAML parameters args.raml_file = cargs.raml_file args.n_samples = cargs.n_samples args.tau = cargs.tau # Scheduled sampling parameters args.decay_factor = cargs.decay_factor # Policy Gradient parameters args.epoch_teacherForcing = cargs.epoch_teacherForcing # How long to run teacherForcing before running policy gradient args.nb_sample_training = cargs.nb_sample_training # How many samples we take for each batch during policy gradient args.policy_gradient_reward_mode = cargs.policy_gradient_reward_mode # How many samples we take for each batch during policy gradient # The following arguments are not controlled by command line. args.restore_optimizer = True load_exclude_set = [] restoreCallback = None args.batch_per_epoch = cargs.batch_per_epoch args.embedding_size = 300 args.eh_size = cargs.eh_size args.dh_size = cargs.dh_size args.decode_mode = cargs.decode_mode args.top_k = cargs.top_k args.length_penalty = cargs.length_penalty args.droprate = cargs.droprate args.batchnorm = cargs.batchnorm args.lr = cargs.lr args.batch_size = 3*args.n_samples if args.model=="raml" else 32 args.batch_num_per_gradient = 4 args.grad_clip = 5 args.show_sample = [0] # show which batch when evaluating at tensorboard args.max_sent_length = 50 args.checkpoint_steps = 20 args.checkpoint_max_to_keep = 5 args.seed = cargs.seed import random random.seed(cargs.seed) import torch torch.manual_seed(cargs.seed) import numpy as np np.random.seed(cargs.seed) from main import main main(args, load_exclude_set, restoreCallback)