Exemplo n.º 1
0
lm_config_registry.set_root_config({
    # data
    'data_path': 'onebillionword/',
    'dict_path': "",
    'vocab_path': "",
    'dict_vocab_path': "",
    'layout': 'standard',
    'num_input_words': 10000,
    'def_num_input_words': 0,  #0 => num_input_words
    'num_output_words': 10000,
    'max_length': 100,
    'batch_size': 64,
    'batch_size_valid': 64,
    'max_def_length': 100,
    'max_def_per_word': 1000,
    'exclude_top_k': 0,

    # model
    'emb_dim': 500,
    'emb_def_dim': 500,
    'dim': 500,
    'compose_type': 'sum',
    'disregard_word_embeddings': False,
    'learning_rate': 0.001,
    'momentum': 0.9,
    'grad_clip_threshold': 5.0,

    # embeddings
    'embedding_path': '',

    # model: def_reader
    'def_reader': 'LSTM',
    'standalone_def_rnn': False,
    'standalone_def_lookup': False,
    'cache_size': 0,  # when 0: no cache

    # monitoring and checkpointing
    'mon_freq_train': 200,
    'mon_freq_valid': 2000,
    'save_freq_batches': 2000,
    'checkpoint_every_n_batches': 100000,
    'very_rare_threshold': [1000, 100, 10],
    'n_batches': 0,
    'monitor_parameters': False,
    'fast_checkpoint': False
})
Exemplo n.º 2
0
nli_esim_config_registry.set_root_config({
    'data_path':
    'snli',
    'layout':
    'snli',

    # Lookup params
    'max_def_per_word':
    100000,
    'emb_dim':
    300,
    'bn':
    0,
    'dim':
    300,
    'dict_path':
    '',
    'vocab':
    '',
    'vocab_text':
    '',  # Defaults to vocab. Use when original vocab cannot be used for frequency in dict
    'encoder':
    'bilstm',

    # Also used in NYU-MLI
    'embedding_path':
    '',
    'train_emb':
    1,
    'train_def_emb':
    1,

    # Dict params
    'vocab_def':
    '',
    'compose_type':
    '',
    'try_lowercase':
    True,
    'disregard_word_embeddings':
    False,
    'exclude_top_k':
    -1,
    'max_def_length':
    50,
    'embedding_def_path':
    '',
    'def_dim':
    100,
    'combiner_reader_translate':
    False,
    'def_emb_translate_dim':
    -1,
    'def_emb_dim':
    -1,
    "combiner_dropout":
    0.0,
    'num_input_def_words':
    0,
    "combiner_dropout_type":
    "per_unit",
    'with_too_long_defs':
    'drop',
    "combiner_gating":
    "none",
    "combiner_shortcut":
    False,
    'reader_type':
    'mean',
    'share_def_lookup':
    False,
    'combiner_bn':
    False,
    'num_input_words':
    -1,  # Will take vocab size
    "dropout":
    0.5,
    'batch_size':
    32,
    'lr':
    0.0004,

    # Misc. Monitor every 100% of epoch
    'monitor_parameters':
    0,
    'mon_freq':
    int((500000) / 32) / 2,  # 2 times per epoch
    'save_freq_epochs':
    1,
    'mon_freq_valid':
    int((500000) / 32) / 2,
    'n_batches':
    150 * (500000 / 32)  # ~50 epochs of SNLI
})
Exemplo n.º 3
0
configs_ae.set_root_config({
    # data_path: not useful to use that, it's better to use FUEL_DATA_PATH
    # so that we can keep identical configs for different dictionaries
    'data_path': '', 
    # the following param was useful to run a baseline without an encoder
    # would be similar to word2vec with only one target word (the defined word)
    # this is NOT the baseline in the paper, it is weaker than word2vec
    'vocab_keys_path': '',
    'layout' : 'dict', # don't change. TODO remove this option
    # num_input_words can be set lower than the number of lines in vocab.txt
    # this allows to replace rare words with UNK (for example, if set to all the words 
    # from line 10000 on will be replaced by UNK token if it is set to 10000)
    'num_input_words' : 10000,
    # same for num_output_words: the loss will ignore words that are ranked 
    # above the value
    'num_output_words': 10000,
    # max definition length
    'max_length' : 100,
    'batch_size' : 32,
    'batch_size_valid' : 32,

    # model
    'encoder': 'lstm', # experimental code with bilstm variants (see seq2seq.py)
    'decoder': 'skip-gram', # do not change?
    # You should use emb_dim = dim unless you're playing with more experimental
    # code.
    'emb_dim' : 300, 
    'dim' : 300,
    # Optimizer is adam.
    'learning_rate' : 0.0003,
    'momentum' : 0.9,
    'grad_clip_threshold' : 5.0,
    'shared_rnn': False, # when using a lstm encoder and decoder only
    # the translate layer is an optional linear layer that transforms
    # the last hidden state of the encoder to be the definition embedding
    'translate_layer': 'linear', 
    'word_dropout': 0.0, # probability of replacing a word with UNK in defs
    'tied_in_out': False, 
    'reconstruction_coef': 1, # You can set that to 0 to retrieve Hill's model

    # Load pretrained encoder embeddings
    # it's one of the 2 files (.txt) produced by "pack_glove_update_vocab.py"
    'vocab_path': "", 
    # it's one of the 2 files (.npy) produced by "pack_glove_update_vocab.py"
    'embedding_path': '',
    'freeze_pretrained': False, # set to True when using pretrained embeddings
    'provide_targets': False, # TODO remove from code

    # Consistency penalty
    'proximity_coef': 1, # the lambda coefficient for consistency penalty term
    'proximity_distance': 'l2', # distance: 'l1', 'l2', or 'cos'

    # monitoring and checkpointing
    # the frequency in terms of batch for monitoring train/valid losses
    'mon_freq_train' : 200,
    # in the "full dictionary" setting, there is no validation
    'mon_freq_valid' : 0, # no validation
    'n_valid_early': 0, # no validation
    'save_freq_batches' : 0,
    'checkpoint_every_n_batches': 0,
    'checkpoint_every_n_epochs': 5,
    'n_epochs' : 50,
    'monitor_parameters' : False,
    'fast_checkpoint' : True,
    'seed': 1


})
qa_config_registry.set_root_config({
    # data
    'data_path': "",
    'dict_path': "",
    'vocab_path': "",
    'dict_vocab_path': "",
    'embedding_path': "",
    'layout': 'standard',
    'num_input_words': 10000,
    'def_num_input_words': 0,
    'max_length': 100,
    'batch_size': 32,
    'batch_size_valid': 32,

    # retrieval hacks
    'max_def_length': 1000,
    'with_too_long_defs': 'drop',
    'max_def_per_word': 1000,
    'with_too_many_defs': 'random',
    'exclude_top_k': 0,

    # model
    'def_reader': 'LSTMReadDefinitions',
    'dim': 128,
    'emb_dim': 0,
    'readout_dims': [],
    'coattention': True,
    'learning_rate': 0.001,
    'momentum': 0.9,
    'annealing_learning_rate': 0.0001,
    'annealing_start_epoch': 10,
    'grad_clip_threshold': 5.0,
    'emb_dropout': 0,
    'emb_dropout_type': 'regular',
    'dropout': 0.,
    'random_unk': False,
    'def_word_gating': "none",
    'compose_type': "sum",
    'reuse_word_embeddings': False,
    'train_only_def_part': False,

    # monitoring and checkpointing
    'mon_freq_train': 10,
    'save_freq_batches': 1000,
    'save_freq_epochs': 1,
    # that corresponds to about 12 epochs
    'n_batches': 0,
    'n_epochs': 0,
    'monitor_parameters': False
})
snli_config_registry.set_root_config({
    'data_path': 'snli/',
    'layout': 'snli',

    # Lookup params
    'translate_dim': 300,
    'max_def_per_word': 100000,
    'bn': True,
    'mlp_dim': 600,
    'emb_dim': 300,  # Used for def and word lookup
    'dict_path': '',

    # Remove by default embeddings. Our goal ATM is to beat random init
    'embedding_path':
    '',  #/data/lisa/exp/jastrzes/dict_based_learning/data/snli/glove.840B.300d.npy',
    'vocab_def': '',
    'vocab_text':
    '',  # If passed will be used for exclude_top_k in Retrieval only
    'vocab': '',
    'def_dim': 300,  # LSTM reader hidden state or translate in MeanPool
    'def_emb_dim': -1,  # Dimensionality of vectors used in definitions
    'compose_type': '',
    'disregard_word_embeddings': False,
    'exclude_top_k': -1,
    'max_def_length': 50,
    'with_too_long_defs': 'drop',
    'train_emb':
    1,  # Remove by default embeddings. Our goal ATM is to beat random init
    "combiner_dropout": 0.0,
    "combiner_dropout_type": "per_unit",
    "combiner_gating": "none",
    "combiner_reader_translate": True,
    "combiner_shortcut": False,
    'reader_type': 'rnn',
    'share_def_lookup': False,
    'combiner_bn': False,
    'num_input_words': 0,  # Will take vocab size
    'num_input_def_words': 0,  # Will take vocab size
    "encoder": "sum",
    "dropout": 0.3,
    'batch_size': 512,
    'lr': 0.001,
    'l2': 4e-6,

    # Misc
    'monitor_parameters': 0,
    'mon_freq': 1000,
    'save_freq_epochs': 1,
    'mon_freq_valid': 1000,
    'n_batches': 200000  # ~200 epochs of SNLI with batch size 500 (!
})
lm_config_registry = ConfigRegistry()
lm_config_registry.set_root_config({
    # data
    'data_path': "",
    'dict_path': "",
    'layout': 'standard',
    'num_input_words': 10000,
    'num_output_words': 10000,
    'max_length': 100,
    'batch_size': 32,
    'batch_size_valid': 32,
    'max_def_length': 1000,
    'exclude_top_k': -1,

    # model
    'dim': 128,
    'compose_type': 'sum',
    'standalone_def_rnn': True,
    'disregard_word_embeddings': False,
    'learning_rate': 0.001,
    'momentum': 0.9,
    'grad_clip_threshold': 5.0,

    # monitoring and checkpointing
    'mon_freq_train': 10,
    'mon_freq_valid': 1000,
    'save_freq_batches': 1000,
    'n_batches': 0,
    'monitor_parameters': False
})