Exemplo n.º 1
0
def validate_input_args():
    global model_path
    validate((args.b, int, 1, 100000000))
    validate((args.e, int, 1, 100000000))
    validate((args.sentence_length, int, 1, 10000))
    validate((args.token_emb_size, int, 1, 10000))
    validate((args.intent_hidden_size, int, 1, 10000))
    validate((args.lstm_hidden_size, int, 1, 10000))
    validate((args.save_epochs, int, 1, 1000))
    validate((args.tagger_dropout, float, 0, 1))
    model_path = path.join(path.dirname(path.realpath(__file__)), str(args.model_path))
    validate_parent_exists(model_path)
Exemplo n.º 2
0
def validate_input_args():
    global model_path
    validate((args.b, int, 1, 100000000))
    validate((args.e, int, 1, 100000000))
    validate((args.sentence_length, int, 1, 10000))
    validate((args.token_emb_size, int, 1, 10000))
    validate((args.intent_hidden_size, int, 1, 10000))
    validate((args.lstm_hidden_size, int, 1, 10000))
    validate((args.save_epochs, int, 1, 1000))
    validate((args.tagger_dropout, float, 0, 1))
    model_path = path.join(path.dirname(path.realpath(__file__)),
                           str(args.model_path))
    validate_parent_exists(model_path)
Exemplo n.º 3
0
def validate_input_args():
    global model_path, settings_path
    validate((args.sentence_len, int, 1, 1000))
    validate((args.lstm_depth, int, 1, 10))
    validate((args.lstm_hidden_size, int, 1, 10000))
    validate((args.token_embedding_size, int, 1, 10000))
    validate((args.pos_embedding_size, int, 1, 1000))
    validate((args.vocab_size, int, 1, 100000000))
    validate((args.char_hidden_size, int, 1, 1000))
    validate((args.max_char_word_length, int, 1, 100))
    model_path = path.join(path.dirname(path.realpath(__file__)), str(args.model_name))
    settings_path = path.join(path.dirname(path.realpath(__file__)), str(args.settings))
    validate_parent_exists(model_path)
    validate_parent_exists(settings_path)
Exemplo n.º 4
0
def validate_input_args(args):
    validate((args.b, int, 1, 100000))
    validate((args.e, int, 1, 100000))
    validate((args.tag_num, int, 1, 1000))
    validate((args.sentence_length, int, 1, 10000))
    validate((args.word_length, int, 1, 100))
    validate((args.word_embedding_dims, int, 1, 10000))
    validate((args.character_embedding_dims, int, 1, 1000))
    validate((args.char_features_lstm_dims, int, 1, 10000))
    validate((args.entity_tagger_lstm_dims, int, 1, 10000))
    validate((args.dropout, float, 0, 1))
    model_path = path.join(path.dirname(path.realpath(__file__)), str(args.model_path))
    validate_parent_exists(model_path)
    model_info_path = path.join(path.dirname(path.realpath(__file__)), str(args.model_info_path))
    validate_parent_exists(model_info_path)
Exemplo n.º 5
0
def validate_input_args():
    global model_path, settings_path
    validate((args.sentence_len, int, 1, 1000))
    validate((args.lstm_depth, int, 1, 10))
    validate((args.lstm_hidden_size, int, 1, 10000))
    validate((args.token_embedding_size, int, 1, 10000))
    validate((args.pos_embedding_size, int, 1, 1000))
    validate((args.vocab_size, int, 1, 100000000))
    validate((args.char_hidden_size, int, 1, 1000))
    validate((args.max_char_word_length, int, 1, 100))
    model_path = path.join(path.dirname(path.realpath(__file__)),
                           str(args.model_name))
    settings_path = path.join(path.dirname(path.realpath(__file__)),
                              str(args.settings))
    validate_parent_exists(model_path)
    validate_parent_exists(settings_path)
Exemplo n.º 6
0
def validate_parent_exists(file_path):
    """Validates parent directory exists in case the file_path is not None"""
    if file_path is not None and file_path:
        io.validate_parent_exists(fix_path(file_path))
Exemplo n.º 7
0
tf.flags.DEFINE_boolean("use_match_type", False, "use match type features")
tf.flags.DEFINE_boolean("cache_match_type", False, "cache match type answers")
tf.flags.DEFINE_boolean("cache_vectorized", False, "cache vectorized data")
tf.flags.DEFINE_boolean("use_oov", False, "use OOV test set")
tf.flags.DEFINE_string("data_dir", "data/", "File to save model weights to.")
tf.flags.DEFINE_string("weights_save_path", "saved_tf/",
                       "File to save model weights to.")
FLAGS = tf.flags.FLAGS

validate((FLAGS.task, int, 1, 7), (FLAGS.nhops, int, 1, 100),
         (FLAGS.emb_size, int, 1, 10000))

# Validate inputs
current_dir = os.path.dirname(os.path.realpath(__file__))
weights_save_path = os.path.join(current_dir, FLAGS.weights_save_path)
validate_parent_exists(weights_save_path)
data_dir = os.path.join(current_dir, FLAGS.data_dir)
validate_parent_exists(data_dir)

babi = BABI_Dialog(
    path=data_dir,
    task=FLAGS.task,
    oov=FLAGS.use_oov,
    use_match_type=FLAGS.use_match_type,
    cache_match_type=FLAGS.cache_match_type,
    cache_vectorized=FLAGS.cache_vectorized,
)

with tf.Session() as sess:
    memn2n = MemN2N_Dialog(
        32,
Exemplo n.º 8
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--file_path', type=validate_existing_filepath, default='./',
                        help='file_path where the files to parse are located')
    parser.add_argument('--data_type', type=str, default='amazon',
                        choices=['amazon'])
    parser.add_argument('--output_file', type=validate_parent_exists, default='./opt_trials.pkl',
                        help='file_path where the output of the trials will be located')
    parser.add_argument('--new_trials', type=int, default=20, action=check_size(1, 20000))
    args_in = parser.parse_args()

    # Check inputs
    if args_in.file_path:
        validate_existing_filepath(args_in.file_path)
    if args_in.output_file:
        validate_parent_exists(args_in.output_file)

    if args_in.data_type == 'amazon':
        data_in = Amazon_Reviews(args_in.file_path)

    try:
        if args_in.output_file.endswith('.pkl'):
            with open(args_in.output_file, 'rb') as read_f:
                trials_to_keep = pickle.load(read_f)
            print("Utilizing existing trial files")
        else:
            trials_to_keep = Trials()
    # If the file does not already exist we will start with a new set of trials
    except FileNotFoundError:
        trials_to_keep = Trials()
Exemplo n.º 9
0
    model.save(model_path)


if __name__ == '__main__':
    # read input args and validate
    parser = create_argument_parser()
    args = parser.parse_args()
    validate((args.sentence_length, int, 1, 1000))
    validate((args.feature_size, int, 1, 10000))
    validate((args.b, int, 1, 100000))
    validate((args.e, int, 1, 100000))
    model_path = path.join(path.dirname(path.realpath(__file__)),
                           '{}.h5'.format(str(args.model_name)))
    settings_path = path.join(path.dirname(path.realpath(__file__)),
                              '{}.params'.format(str(args.model_name)))
    validate_parent_exists(model_path)

    # load dataset and get tokens/chunks/pos tags
    dataset = CONLL2000(data_path=args.data_dir,
                        sentence_length=args.sentence_length,
                        extract_chars=args.char_features,
                        max_word_length=args.max_word_length)
    train_set = dataset.train_set
    test_set = dataset.test_set
    words_train, pos_train, chunk_train = train_set[:3]
    words_test, pos_test, chunk_test = test_set[:3]

    # get label sizes, transform y's into 1-hot encoding
    chunk_labels = len(dataset.chunk_vocab) + 1
    pos_labels = len(dataset.pos_vocab) + 1
    word_vocab_size = len(dataset.word_vocab) + 2
Exemplo n.º 10
0
    '--double_dict',
    type=str,
    default='3',
    help='specifies whether to use a second dictionary for words within ' +
    'specified extended window. ie for "-w 1 -d 2", the ' +
    'sentence "hello world how are things" creates a window of "2:hello ' +
    '1:world <NULL> 1:are 2:things"')
parser.add_argument('-t',
                    '--num_threads',
                    type=int,
                    default=4,
                    help='number of threads to use',
                    action=check_size(1, 10))
args = vars(parser.parse_args())

validate_parent_exists(args['data_dir'])
if args['entities']:
    validate_parent_exists(args['entities'])
validate((args['window_size'], str, 1, 100),
         (args['double_dict'], str, 1, 100))

beg = time.time()

if args['data_dir']:
    # also set the entities and input file here
    args['entities'] = os.path.expanduser(
        args['data_dir'] + '/movieqa/knowledge_source/entities.txt')
    args['input_file'] = [
        os.path.expanduser(args['data_dir'] +
                           '/movieqa/knowledge_source/wiki.txt')
    ]
Exemplo n.º 11
0
                    help='default behavior merges lines from the same example--set this flag ' +
                    'to disable and only consider windows from the same line in the file')
parser.add_argument('-w', '--window_size', type=str, default='0',
                    help='sizes of windows PER SIDE around words to generate. eg 1 or ' +
                    '1,2,3. ie "-w 1" for "hey world hey" produces "hey <NULL> hey"')
parser.add_argument('-d', '--double_dict', type=str, default='3',
                    help='specifies whether to use a second dictionary for words within ' +
                    'specified extended window. ie for "-w 1 -d 2", the ' +
                    'sentence "hello world how are things" creates a window of "2:hello ' +
                    '1:world <NULL> 1:are 2:things"')
parser.add_argument('-t', '--num_threads', type=int, default=4,
                    help='number of threads to use',
                    action=check_size(1,10))
args = vars(parser.parse_args())

validate_parent_exists(args['data_dir'])
if args['entities']:
    validate_parent_exists(args['entities'])
validate((args['window_size'], str, 1, 100), (args['double_dict'], str, 1, 100))

beg = time.time()

if args['data_dir']:
    # also set the entities and input file here
    args['entities'] = os.path.expanduser(args['data_dir'] +
                                          '/movieqa/knowledge_source/entities.txt')
    args['input_file'] = [os.path.expanduser(args['data_dir'] +
                                             '/movieqa/knowledge_source/wiki.txt')]
else:
    ValueError("No data_dir given.")
Exemplo n.º 12
0
parser.add_argument(
    '--test',
    default=False,
    action='store_true',
    help='evaluate on the test set at the end of training.')

parser.set_defaults(batch_size=32, epochs=200)
args = parser.parse_args()

validate((args.emb_size, int, 1, 10000),
         (args.eps, float, 1e-15, 1e-2),
         (args.lr, float, 1e-8, 10),
         (args.grad_clip_norm, float, 1e-3, 1e5))

# Validate inputs
validate_parent_exists(args.log_file)
log_file = args.log_file
validate_parent_exists(args.weights_save_path)
weights_save_path = args.weights_save_path
validate_parent_exists(args.data_dir)
data_dir = args.data_dir
assert weights_save_path.endswith('.npz')
assert log_file.endswith('.txt')

gradient_clip_norm = args.grad_clip_norm

babi = BABI_Dialog(
    path=data_dir,
    task=args.task,
    oov=args.use_oov,
    use_match_type=args.use_match_type,
Exemplo n.º 13
0
                    action="store_true",
                    help="Run Inference with loaded weight")
parser.add_argument('--restore',
                    action="store_true",
                    help="Run the model restoring weights from model_file")
parser.add_argument(
    '--interactive',
    action="store_true",
    help="Run Inference on User-supplied text either after training or \
                    with saved weights")

parser.set_defaults()
args = parser.parse_args()

if args.model_file:
    validate_parent_exists(args.model_file)

if (args.inference is True) and (args.model_file is None):
    print("Need to set --model_file for Inference problem")
    quit()

if args.model_file is not None:
    model_file = os.path.expanduser(args.model_file)
else:
    model_file = None

wikimovies = WIKIMOVIES(args.data_dir,
                        subset=args.subset,
                        reparse=args.reparse,
                        mem_source=args.mem_mode)
Exemplo n.º 14
0
    action=check_size(1e-100, 1e-2))
parser.add_argument('--model_file',
                    default='memn2n_weights.npz',
                    help='File to load model weights from.',
                    type=str)

parser.set_defaults(batch_size=32, epochs=200)
args = parser.parse_args()

validate((args.emb_size, int, 1, 10000), (args.eps, float, 1e-15, 1e-2))

# Sanitize inputs
validate_existing_filepath(args.model_file)
model_file = args.model_file
assert model_file.endswith('.npz')
validate_parent_exists(args.data_dir)
data_dir = args.data_dir

babi = BABI_Dialog(path=data_dir,
                   task=args.task,
                   oov=args.use_oov,
                   use_match_type=args.use_match_type,
                   cache_match_type=args.cache_match_type,
                   cache_vectorized=args.cache_vectorized)

weight_saver = Saver()

# Set num iterations to 1 epoch since we loop over epochs & shuffle
ndata = babi.data_dict['train']['memory']['data'].shape[0]
num_iterations = ndata // args.batch_size
Exemplo n.º 15
0
    '--model_file',
    default='memn2n_weights.npz',
    help='File to load model weights from.',
    type=str)

parser.set_defaults(batch_size=32, epochs=200)
args = parser.parse_args()

validate((args.emb_size, int, 1, 10000),
         (args.eps, float, 1e-15, 1e-2))

# Sanitize inputs
validate_existing_filepath(args.model_file)
model_file = args.model_file
assert model_file.endswith('.npz')
validate_parent_exists(args.data_dir)
data_dir = args.data_dir

babi = BABI_Dialog(
    path=data_dir,
    task=args.task,
    oov=args.use_oov,
    use_match_type=args.use_match_type,
    cache_match_type=args.cache_match_type,
    cache_vectorized=args.cache_vectorized)

weight_saver = Saver()

# Set num iterations to 1 epoch since we loop over epochs & shuffle
ndata = babi.data_dict['train']['memory']['data'].shape[0]
num_iterations = ndata // args.batch_size