Пример #1
0
def main():

    # Load the parameters from json file
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    params = Params(json_path)

    # Set the logger 
    set_logger(os.path.join(args.model_dir, 'train.log'))

    # Create the input data pipeline
    logging.info('Creating the dataset...')
    data_dir = args.data_dir
    valid_data_dir = os.path.join(data_dir, 'valid')
    
    # Get the filenames and labels from the test set
    valid_filenames, valid_labels = get_filenames_and_labels(
        valid_data_dir, params)

    params.valid_size = len(valid_filenames)
    params.num_labels = len(set(valid_labels))

    # Create the two iterators over the two datasets
    valid_inputs = input_fn(False, valid_filenames,
                            valid_labels, params)

    # Define the model
    logging.info("Creating the model...")
    model_spec = model_fn('eval', valid_inputs, params,
                          reuse=False)

    logging.info("Starting evaluation")
    evaluate(model_spec, args.model_dir, params,
             args.restore_from)
Пример #2
0
def main():

    # Load the parameters from json file
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(json_path), 'No json configuration file found at {}'.format(json_path)
    params = Params(json_path)

    # Set the logger 
    set_logger(os.path.join(args.model_dir, 'train.log'))
    
    if not os.path.exists(args.restore_from):
        os.makedirs(args.restore_from)

    # Create the input data pipeline
    logging.info('Creating the datasets...')
    data_dir = args.data_dir
    train_data_dir = os.path.join(data_dir, 'train')
    valid_data_dir = os.path.join(data_dir, 'valid')

    # Get the filenames and labels from the train and valid sets
    train_filenames, train_labels = get_filenames_and_labels(
        train_data_dir, params)
    valid_filenames, valid_labels = get_filenames_and_labels(
        valid_data_dir, params)

    params.train_size = len(train_filenames)
    params.valid_size = len(valid_filenames)
    params.num_labels = len(set(train_labels))

    # Create the two iterators over the two datasets
    train_inputs = input_fn(True, train_filenames,
                            train_labels, params)
    valid_inputs = input_fn(False, valid_filenames,
                            valid_labels, params)

    # Define the model
    logging.info('Creating the model...')
    train_model_spec = model_fn('train', train_inputs,
                                params)
    valid_model_spec = model_fn('eval', valid_inputs,
                                params, reuse=True)
    # Train the model
    logging.info('Starting training for {} epoch(s)'.format(
        params.num_epochs))
    train_and_evaluate(train_model_spec, valid_model_spec,
                       args.model_dir, params, args.restore_from)
Пример #3
0
def train():
    # Set the logger
    set_logger(os.path.join(params['model_dir'], 'train.log'))
    # log params
    logging.info(params)

    # Load vacabulary
    vocab = tf.contrib.lookup.index_table_from_file(vocab_path,
                                                    num_oov_buckets=1)

    # Create the input data pipeline
    logging.info('Creating the datasets...')
    train_input_words = load_dataset_from_text(data_dir, train_input_filename,
                                               vocab)
    train_context_words = load_dataset_from_text(data_dir,
                                                 train_context_filename, vocab)

    # Create the iterator over the dataset
    train_inputs = input_fn('train', train_input_words, train_context_words,
                            params)
    eval_inputs = input_fn('eval', train_input_words, train_context_words,
                           params)
    logging.info("- done")

    # Define the model
    logging.info('Creating the model...')
    train_model_spec = model_fn('train',
                                train_inputs,
                                params,
                                reuse=tf.AUTO_REUSE)
    eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True)
    logging.info('- done.')

    # Train the model
    logging.info('Starting training for {} epochs'.format(
        params['num_epochs']))
    normalized_embedding_matrix = train_and_evaluate(train_model_spec,
                                                     eval_model_spec, params)

    save_dict_to_json(params, params['model_dir'] + '/params.json')
    pd.DataFrame(normalized_embedding_matrix).to_csv(os.path.join(
        params['model_dir'], 'normalized_embedding_matrix.tsv'),
                                                     index=False,
                                                     header=None,
                                                     sep='\t')
Пример #4
0
def funct(x):
    # Set the random seed for the whole graph

    tf.set_random_seed(230)

    # Load the parameters
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)



    # Set the logger
    set_logger(os.path.join(args.data_dir, 'predict.log'))

    # Create the input data pipeline

    data_dir = args.data_dir
    test_data_dir = os.path.join(data_dir)

    # Get the filenames from the test set

    test_filenames = [os.path.join(test_data_dir, 'predict.jpg') ]

    test_labels = [x]
    # print(test_labels)

    # specify the size of the evaluation set
    params.eval_size = len(test_filenames)

    # create the iterator over the dataset
    test_inputs = input_fn(False, test_filenames, test_labels, params)

    # Define the model

    model_spec = model_fn('eval', test_inputs, params, reuse=tf.AUTO_REUSE)


    evaluate(model_spec, args.model_dir, params, args.restore_from)
Пример #5
0
    #TODO: check and load if there's the best weights so far
    #     model_dir_has_best_weights = os.path.isdir(os.path.join(args.model_dir, "best_weights"))

    #set logger
    set_logger(os.path.join(args.model_dir, 'train.log'))

    #train/test split
    train_fpaths, test_fpaths, train_targets, test_targets = \
        get_train_test_split(args.json_path, args.data_dir, train_size=args.train_size)

    params.train_size = len(train_fpaths)
    params.test_size = len(test_fpaths)

    logging.info("Creating the dataset...")
    train_inputs = input_fn(True, train_fpaths, train_targets, params)
    test_inputs = input_fn(False, test_fpaths, test_targets, params)

    logging.info("Creating the model...")
    train_model_spec = model_fn(True, train_inputs, params)
    test_model_spec = model_fn(False, test_inputs, params, reuse=True)

    logging.info("train set predict...")
    predict(train_model_spec, args.model_save_dir, params, args.restore_from)

    logging.info("test set predict...")
    predict(test_model_spec, args.model_save_dir, params, args.restore_from)

    end_time = time.time()
    logging.info("Elapsed training time is {:.2f} secs".format(end_time -
                                                               start_time))
Пример #6
0
from train import import_names_and_labels

parser = argparse.ArgumentParser()
parser.add_argument("--model_dir", default="experiments/07_full_images")
parser.add_argument("--data_dir", default="data/kaggle")
parser.add_argument("--restore_from", default="best_weights")
parser.add_argument("--set", default="test")

if __name__ == "__main__":
    tf.set_random_seed(230)
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, "params.json")
    params = Params(json_path)
    params.evaluate()

    set_logger(os.path.join(args.model_dir, 'evaluate.log'))

    logging.info("Creating the dataset...")
    data_dir = args.data_dir
    image_dir = os.path.join(data_dir, "images")
    names, labels = import_names_and_labels(data_dir, "test",
                                            params.num_labels)
    params.eval_size = len(names)
    inputs = input_fn("test", image_dir, names, labels, params)

    logging.info("Creating the model...")
    model_spec = model_fn("eval", inputs, params)

    logging.info("Evaluating...")
    evaluate(model_spec, args.model_dir, params, args.restore_from)
Пример #7
0
def train(param_file):

    # load params from param file
    params = Params(param_file)

    # determine model dir
    if params.model_dir == 'same':  # this allows the model dir to be inferred from params.json file path
        params.model_dir = os.path.dirname(param_file)
    if not os.path.isdir(params.model_dir):
        raise ValueError("Specified model directory does not exist: {}".format(
            params.model_dir))

    # Set the logger, delete old log file if overwrite param is set to yes
    train_dir = os.path.join(params.model_dir, 'train')
    if not os.path.isdir(train_dir):
        os.mkdir(train_dir)
    log_path = os.path.join(train_dir, 'train.log')
    if os.path.isfile(log_path) and params.overwrite:
        os.remove(log_path)
    set_logger(log_path)
    logging.info("Using model directory {}".format(params.model_dir))
    logging.info("Using logging file {}".format(log_path))
    logging.info("Using TensorFlow version {}".format(tf.__version__))

    # Make sure data directory exists
    if not os.path.isdir(params.data_dir):
        raise ValueError("Specified data directory does not exist: {}".format(
            params.data_dir))
    logging.info("Using data directory {}".format(params.data_dir))

    # determine distribution strategy for multi GPU training
    if params.dist_strat.lower() == 'mirrored':
        logging.info("Using Mirrored distribution strategy")
        params.strategy = tf.distribute.MirroredStrategy()
        # adjust batch size and learning rate to compensate for mirrored replicas
        # batch size is multiplied by num replicas
        params.batch_size = params.batch_size * params.strategy.num_replicas_in_sync
        logging.info("Batch size adjusted to {} for {} replicas".format(
            params.batch_size, params.strategy.num_replicas_in_sync))
        # initial learning rate is multiplied by squre root of replicas
        # params.learning_rate[0] = params.learning_rate[0] * np.sqrt(params.strategy.num_replicas_in_sync)
        # logging.info(
        #     "Initial learning rate adjusted by a factor of {} (root {} for {} replicas)".format(
        #         np.sqrt(params.strategy.num_replicas_in_sync), params.strategy.num_replicas_in_sync,
        #         params.strategy.num_replicas_in_sync))
    else:
        params.strategy = tf.distribute.get_strategy()

    # Determine checkpoint directories and determine current epoch
    checkpoint_path = os.path.join(params.model_dir, 'checkpoints')
    latest_ckpt = None
    if not os.path.isdir(checkpoint_path):
        os.mkdir(checkpoint_path)
    checkpoints = glob(checkpoint_path + '/*.hdf5')
    if checkpoints and not params.overwrite:
        latest_ckpt = max(checkpoints, key=os.path.getctime)
        completed_epochs = int(
            os.path.splitext(os.path.basename(latest_ckpt).split('epoch_')[1])
            [0].split('_')[0])
        logging.info("Checkpoint exists for epoch {}".format(completed_epochs))
    else:
        completed_epochs = 0

    # generate dataset objects for model inputs
    train_inputs = patch_input_fn(params, mode='train')
    eval_inputs = patch_input_fn(params, mode='eval')

    # Check for existing model and load if exists, otherwise create from scratch
    if latest_ckpt and not params.overwrite:
        logging.info("Creating the model to resume checkpoint")
        model = model_fn(
            params
        )  # recreating model from scratech may be neccesary if custom loss function is used
        logging.info(
            "Loading model weights checkpoint file {}".format(latest_ckpt))
        model.load_weights(latest_ckpt)
    else:
        # Define the model from scratch
        logging.info("Creating the model...")
        model = model_fn(params)

    # SET CALLBACKS FOR TRAINING FUNCTION

    # define learning rate schedule callback for model
    learning_rate = LearningRateScheduler(
        learning_rate_picker(params.learning_rate, params.learning_rate_decay))

    # checkpoint save callback
    if not os.path.isdir(checkpoint_path):
        os.mkdir(checkpoint_path)
    # save validation loss in name if evaluation files are passed, else use train loss
    if params.train_fract < 1.:
        ckpt = os.path.join(checkpoint_path,
                            'epoch_{epoch:02d}_valloss_{val_loss:.4f}.hdf5')
    else:
        ckpt = os.path.join(checkpoint_path,
                            'epoch_{epoch:02d}_trainloss_{loss:.4f}.hdf5')
    checkpoint = ModelCheckpoint(ckpt,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_weights_only=False,
                                 save_best_only=False,
                                 mode='auto',
                                 save_freq='epoch')

    # tensorboard callback
    tensorboard = TensorBoard(
        log_dir=params.model_dir,
        histogram_freq=0,
        write_graph=True,
        write_images=False,
        update_freq=(params.samples_per_epoch // params.batch_size) //
        100,  # write losses/metrics 100x per epoch
        profile_batch=2,
        embeddings_freq=0,
        embeddings_metadata=None)

    # combine callbacks for the model
    train_callbacks = [learning_rate, checkpoint, tensorboard]

    # TRAINING
    logging.info("Training for {} total epochs starting at epoch {}".format(
        params.num_epochs, completed_epochs + 1))
    model.fit(train_inputs,
              epochs=params.num_epochs,
              initial_epoch=completed_epochs,
              steps_per_epoch=params.samples_per_epoch // params.batch_size,
              callbacks=train_callbacks,
              validation_data=eval_inputs,
              shuffle=False,
              verbose=1)
    logging.info(
        "Successfully trained model for {} epochs ({} total epochs)".format(
            params.num_epochs - completed_epochs, params.num_epochs))
Пример #8
0
    # Get paths for vocabularies and dataset
    path_vocab = os.path.join(args.data_dir, 'vocab{}'.format(params.min_freq))
    params.vocab_path = path_vocab
    path_test_queries = os.path.join(args.data_dir, 'dev/queries.txt')
    path_test_articles = os.path.join(args.data_dir, 'dev/articles.txt')
    # Load Vocabularies
    vocab = tf.contrib.lookup.index_table_from_file(
        path_vocab, num_oov_buckets=num_oov_buckets, key_column_index=0)

    # Create the input data pipeline
    logging.info("Creating the dataset...")
    test_queries = load_dataset_from_text(path_test_queries, vocab, params)
    test_articles = load_dataset_from_text(path_test_articles, vocab, params)

    # Specify other parameters for the dataset and the model
    params.eval_size = params.test_size
    params.id_pad_word = vocab.lookup(tf.constant(params.pad_word))

    # Create iterator over the test set
    inputs = input_fn('eval', test_queries, test_articles, params)
    logging.info("- done.")

    # Define the model
    logging.info("Creating the model...")
    model_spec = model_fn('eval', inputs, params, reuse=False)
    logging.info("- done.")

    logging.info("Starting evaluation")
    evaluate(model_spec, args.model_dir, params, args.restore_from)
Пример #9
0
    params.vocab_fname, num_oov_buckets=params.num_oov_buckets)
char_vocab_rev = tf.contrib.lookup.index_to_string_table_from_file(
    params.vocab_fname, default_value='<unk>')
vocab_size = char_vocab.size()

params.pad_token_id = char_vocab.lookup(tf.constant(params.pad_token))
params.start_token_id = char_vocab.lookup(tf.constant(params.start_token))
params.end_token_id = char_vocab.lookup(tf.constant(params.end_token))

with tf.Session() as sess:
    sess.run(tf.tables_initializer())
    params.vocab_size_val = vocab_size.eval()

train_dataset = load_dataset_from_file(params.train_fname, char_vocab, params)
train_inputs = input_fn('train', train_dataset, params)
train_model_spec = model_fn('train', train_inputs, params, reuse=False)

test_inputs = test_input_fn('test', char_vocab, params)
test_model_spec = model_fn('test', test_inputs, params, reuse=True)

train_test(train_model_spec, test_model_spec, params, char_vocab_rev)

texts = ['ff', 'av']
preds, pred_logits = test_sess(texts,
                               test_model_spec,
                               params,
                               char_vocab_rev,
                               restore_from=os.path.join(
                                   params.model_dir, 'last_weights'))

for text, t_preds, t_pred_logits in zip(texts, preds, pred_logits):
Пример #10
0
        dataset[split] = dict()
        for field in dataset_fields:
            with open(f'./data/nsmc/{split}/{field}.txt') as f:
                lines = f.readlines()
                dataset[split][field] = tuple([line.strip() for line in lines])

    with open('./params/dataset_params.json') as f:
        data_params = json.load(f)

    with open('./params/model_params.json') as f:
        model_params = json.load(f)

    with open('./params/training_params.json') as f:
        training_params = json.load(f)

    model = model_fn(data_params, model_params)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    training_sentences = input_fn(dataset['train']['sentences'], data_params)
    training_labels = np.asarray(
        [int(label) for label in dataset['train']['labels']])
    dev_sentences = input_fn(dataset['dev']['sentences'], data_params)
    dev_labels = np.asarray([int(label) for label in dataset['dev']['labels']])

    print(training_sentences.shape, training_sentences[1])
    print(training_labels.shape, training_labels[1])
    print(type(dev_sentences), dev_sentences.dtype)

    batch_size = training_params['batch_size']
Пример #11
0
    logging.info("Loading pretrained Word2Vec...")
    word2vec = gensim.models.KeyedVectors.load_word2vec_format(
        args.word2vec_file, binary=True)
    logging.info("- done.")

    logging.info("Building datasets...")
    with open(os.path.join(args.data_dir, "symbols.txt")) as f_symbols:
        symbols = [line.strip() for line in f_symbols]
    train_signal_map = {
        symbol: load_signal(path_train, path_corpus, symbol)
        for symbol in symbols[:2]
    }
    dev_signal_map = {
        symbol: load_signal(path_dev, path_corpus, symbol)
        for symbol in symbols[:2]
    }
    params.eval_size = params.dev_size
    params.buffer_size = params.train_size
    train_inputs = input_fn("train", train_signal_map, word2vec, params)
    eval_inputs = input_fn("eval", dev_signal_map, word2vec, params)
    logging.info("- done.")

    logging.info("Creating the model...")
    train_model_spec = model_fn("train", train_inputs, params)
    eval_model_spec = model_fn("eval", eval_inputs, params, reuse=True)
    logging.info("- done.")

    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
    train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir,
                       params, args.restore_dir)
Пример #12
0
    tf.set_random_seed(230)

    # TODO: Load the parameters
    config_args = parse_args()

    # Create the inputs data pipeline
    data_dir = 'dataset/cone/'
    model_dir = 'experiments/basic_model'
    restore_from = 'best_weights'
    image_dir = os.path.join(data_dir, 'Images')
    label_dir = os.path.join(data_dir, 'Labels')

    test_filenames, test_labels = get_filenames_and_labels(
        image_dir, label_dir, 'test')

    # create ssd labels
    test_size = len(test_filenames)

    preset = get_preset_by_name('ssdmobilenet160')
    test_labels = create_labels(preset, test_size, 2, test_labels)

    print("[INFO] Test labels Shape:", test_labels.shape)

    # Create the two iterators over the two datasets
    test_inputs = input_fn(False, test_filenames, test_labels, config_args)

    # Define the model
    model_specs = model_fn('eval', test_inputs, preset, config_args)

    evaluate(model_specs, model_dir, config_args, restore_from)
Пример #13
0
def predict(params, pred_dirs, out_dir, mask=None, checkpoint='last'):

    # load latest checkpoint
    checkpoint_path = os.path.join(params.model_dir, 'checkpoints')
    checkpoints = glob(checkpoint_path + '/*.hdf5')
    if checkpoints:
        # load best or last checkpoint
        # determine last by timestamp
        if checkpoint == 'last':
            ckpt = max(checkpoints, key=os.path.getctime)
        # determine best by minimum loss value in filename
        elif checkpoint == 'best':
            try:
                vals = [
                    float(item[0:-5].split('_')[-1]) for item in checkpoints
                ]
                ckpt = checkpoints[np.argmin(vals)]
            except:
                line1 = "Could not determine 'best' checkpoint based on checkpoint filenames. "
                line2 = "Use 'last' or pass a specific checkpoint filename to the checkpoint argument."
                logging.error(line1 + line2)
                raise ValueError(line1 + line2)
        elif os.path.isfile(
                os.path.join(my_params.model_dir,
                             "checkpoints/{}.hdf5".format(checkpoint))):
            ckpt = os.path.join(my_params.model_dir,
                                "checkpoints/{}.hdf5".format(checkpoint))
        else:
            raise ValueError("Did not understand checkpoint value: {}".format(
                args.checkpoint))
        # net_builder input layer uses train_dims, so set these to infer dims to allow different size inference
        params.train_dims = params.infer_dims
        # batch size for inference is hard-coded to 1
        params.batch_size = 1
        # recreate the model using infer dims as input dims
        logging.info("Creating the model...")
        model = model_fn(params)
        # load weights from last checkpoint
        logging.info("Loading '{}' checkpoint from {}...".format(
            checkpoint, ckpt))
        model.load_weights(ckpt)
    else:
        raise ValueError(
            "No model checkpoints found at {}".format(checkpoint_path))

    # infer directories in a loop
    niis_out = []
    for pred_dir in pred_dirs:
        # define expected output file name to check if output prediction already exists
        model_name = os.path.basename(params.model_dir)
        name_prefix = os.path.basename(pred_dir)
        pred_out = os.path.join(
            out_dir, name_prefix + '_predictions_' + model_name + '.nii.gz')
        # if output doesn't already exist, then predict and make nii
        if not os.path.isfile(pred_out):
            # Create the inference dataset structure
            infer_inputs = patch_input_fn(params=params,
                                          mode='infer',
                                          infer_dir=pred_dir)
            # predict
            predictions = model.predict(infer_inputs)
            # save nii
            pred_out = predictions_2_nii(predictions,
                                         pred_dir,
                                         out_dir,
                                         params,
                                         mask=mask)
        else:
            logging.info(
                "Predictions already exist and will not be overwritten: {}".
                format(pred_out))
        # update list of output niis
        niis_out.append(pred_out)

    return niis_out
Пример #14
0
                            seed=1234,
                            name='train',
                            params=params)
    logging.info("\nReading the TFRecords validation files...")
    validation_inputs = input_fn(tfrecord_dir=os.path.join(
        args.data_dir, 'validation'),
                                 mean_npz=args.preproc_file,
                                 n_images=None,
                                 is_training=False,
                                 seed=1234,
                                 name='validation',
                                 params=params)

    # Define the model
    logging.info("\nCreating the model...")
    train_model_spec = model_fn(train_inputs, params, is_training=True)
    eval_model_spec = model_fn(validation_inputs, params, is_training=False)

    # Train and evaluate the model
    logging.info("\nStarting training for {} epoch(s)".format(
        params.num_epochs))
    train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir,
                       params, args.restore_from)

    # Create and save the confusion matrices for the train and evaluation datasets
    for mode in ['train', 'validation']:
        logging.info(
            'Creating the confusion matrix for {} data...'.format(mode))

        logging.info(
            '\tRetrieving information from the labels and predictions files')
Пример #15
0
def predict(inp, target, params, restore_from, config=None,\
            model_dir='./ie590_project/experiments/ex1', model_save_dir='./ie590_project/experiments/ex1/model_save/1'):
    """predict target values given input file paths
    Args:
        inp: (list) a string list of image files paths; 2D -> [sample_size, number_of_channels]
        model_spec: (dict) model specifications of tf Ops
        params: (Params or str) Params object or params.joson path
        tar: (list) a float list of target values
        restore_from: (str) ckpt or directory name where ckpts are located for restoring
        ...
    Return:
        out: (list) a list of precicted target values; have exactly same dimension as target
    """

    assert len(inp) == len(target)

    iterator_init_op = model_spec['iterator_init_op']
    update_metrics_op = model_spec['update_metrics_op']
    metrics = model_spec['metrics']
    metrics_init_op = model_spec['metrics_init_op']
    predictions = model_spec['predictions']

    saver = tf.compat.v1.train.Saver()

    if type(params) is str:
        assert os.path.isfile(
            params), "params.json does not exits at {}".format(params)
        params = Params(params)
        params.load(params.update)  # load parameters
    params.inp_size = len(inp)

    set_logger(os.path.join(model_dir, 'train.log'))

    logging.info("Creating the dataset...")
    inputs = input_fn(False, inp, target, params)

    logging.info("Creating the model...")
    model_spec = model_fn(False, inputs, params)

    logging.info("Calculating predictions...")
    with tf.compat.v1.Session(config=config) as sess:
        sess.run(model_spec['variable_init_op'])

        save_path = os.path.join(model_save_dir, restore_from)
        if os.path.isdir(save_path):
            save_path = tf.train.latest_checkpoint(
                save_path
            )  # If restore_from is a directory, get the latest ckpt
        saver.restore(sess, save_path)

        num_steps = (params.inp_size + params.batch_size -
                     1) // params.batch_size

        sess.run([iterator_init_op, metrics_init_op])

        if len(np.shape(target)) == 1:
            out = np.empty(np.shape(target))[:, np.newaxis]
        else:
            out = np.empty(np.shape(target))
        for i in tqdm(range(num_steps)):
            _, predictions_eval = sess.run([update_metrics_op, predictions])
            if i < num_steps - 1:
                out[i * params.batch_size:(i + 1) *
                    params.batch_size, :] = predictions_eval
            else:
                out[i * params.batch_size:, :] = predictions_eval

    return out
Пример #16
0
    # Load the parameters
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)

    # Set the logger
    set_logger(os.path.join(args.model_dir, 'evaluate.log'))

    # Get paths for dataset
    path_test_prices = os.path.join(args.data_dir, 'test_inputs.pkl')
    path_test_deltas = os.path.join(args.data_dir, 'test_labels.pkl')

    # Create the input data pipeline
    logging.info("Creating the dataset...")
    data_dir = args.data_dir
    test_prices, test_deltas = load_prices_and_deltas(path_test_prices,
                                                      path_test_deltas, params)

    # Create the two iterators over the two datasets
    test_inputs = input_fn('test', test_prices, test_deltas, params)
    logging.info("- done.")

    # Define the model
    logging.info("Creating the model...")
    model_spec = model_fn('test', test_inputs, params, reuse=False)

    logging.info("Starting evaluation")
    evaluate(model_spec, args.model_dir, params, args.restore_from)
Пример #17
0
        test_flow_filenames = construct_optical_flow_filenames(
            test_filenames, params.volume_depth)
        test_inputs = input_temporal_fn(False, test_flow_filenames,
                                        test_labels, params)
        # Free up the memory
        del test_flow_filenames
        del test_filenames
        del test_labels
    else:
        test_inputs = input_two_stream_fn(False, test_filenames,
                                          test_flow_filenames, test_labels,
                                          params)
        # Free up the memory
        del test_flow_filenames
        del test_filenames
        del test_labels

    # Define the model
    logging.info("Creating the model...")
    train_model_spec = model_fn('train', train_inputs, params, args.stream)
    test_model_spec = model_fn('test',
                               test_inputs,
                               params,
                               args.stream,
                               reuse=True)

    # Train the model
    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
    train_and_evaluate(train_model_spec, test_model_spec, args.model_dir,
                       params, args.restore_from)
Пример #18
0
eras = tf.contrib.lookup.index_table_from_file(path_era_tags)

# Create the input data pipeline
reviews = load_dataset_from_text(path_reviews, words)
review_eras = load_dataset_from_text(path_eras, eras, isLabels=True)

# Specify other parameters for the dataset and the model
params_era.id_pad_word = words.lookup(tf.constant(params_era.pad_word))
params_era.id_pad_tag = words.lookup(tf.constant(params_era.pad_tag))

# Create the iterator over the test set
inputs_era = input_fn('eval', reviews, review_eras, params_era)

# Define the model
print('Creating era models...')
model_spec_era = model_fn('eval', inputs_era, params_era, reuse=False)
print('Done')

print(era_model_path)
print(path_words)
print(path_era_tags)
print(path_reviews)
print(path_eras)
print(os.path.join(args.model_dir, args.restore_from))

# Evaluate the model...
# evaluate(model-spec, args.model_dir, params, args.restore_from)

# initialize saver to restore model
saver = tf.train.Saver()
Пример #19
0
        if f.endswith('.jpg')
    ]
    eval_filenames = [
        os.path.join(dev_data_dir, f) for f in os.listdir(dev_data_dir)
        if f.endswith('.jpg')
    ]

    # Labels will be between 0 and 5 included (6 classes in total)
    train_labels = [int(f.split('/')[-1][0]) for f in train_filenames]
    eval_labels = [int(f.split('/')[-1][0]) for f in eval_filenames]
    # print("train_labels = "+str(train_labels))
    # print("eval_labels ="+str(eval_labels))

    # Specify the sizes of the dataset we train on and evaluate on
    params.train_size = len(train_filenames)
    params.eval_size = len(eval_filenames)

    # Create the two iterators over the two datasets
    train_inputs = input_fn(True, train_filenames, train_labels, params)
    eval_inputs = input_fn(False, eval_filenames, eval_labels, params)

    # Define the model
    logging.info("Creating the model...")
    train_model_spec = model_fn('train', train_inputs, params)
    eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True)

    # Train the model
    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
    train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir,
                       params, args.restore_from)
Пример #20
0
# Create the input data pipeline
reviews = load_dataset_from_text(path_reviews,words)
review_sentiments = load_dataset_from_text(path_sentiments,sentiments, isLabels=True)

# Specify other parameters for the dataset and the model
params_sentiment.id_pad_word = words.lookup(tf.constant(params_sentiment.pad_word))
params_sentiment.id_pad_tag = words.lookup(tf.constant(params_sentiment.pad_tag))


# Create the iterator over the test set
inputs_sentiment = input_fn('eval', reviews, review_sentiments, params_sentiment)

# Define the model
print('Creating sentiment and era models...')
model_spec_sentiment = model_fn('eval', inputs_sentiment, params_sentiment, reuse=False)
print('Done')

# Evaluate the model... 
# evaluate(model-spec, args.model_dir, params, args.restore_from)

# initialize saver to restore model
saver = tf.train.Saver()

with tf.Session() as sess:
	# Initialize lookup tables for both models
	sess.run(model_spec_sentiment['variable_init_op'])

	# Reload weights from the weights subdirectory
	save_path = os.path.join(args.model_dir, args.restore_from)
	if os.path.isdir(save_path):
Пример #21
0
                continue
            hparam_spec = params[k]
            if hparam_spec["scale"] == "log":
                val = 10**np.random.uniform(*hparam_spec["range"])
            elif hparam_spec["scale"] == "linear":
                val = np.random.uniform(*hparam_spec["range"])
            ret[k.replace("_hparam", "")] = val
        yield ret


if __name__ == "__main__":
    params = params_util.load_params()
    tf.set_random_seed(230)
    with open("data/vocab.json") as f:
        vocab_json = json.load(f)
    vocab_list = vocab_as_sorted_list(vocab_json)
    vocab = tf.contrib.lookup.index_table_from_tensor(
        tf.constant(vocab_list), default_value=params["vocab_unk_idx"])

    glove_weights = np.load("data/glove.npy")

    tweets, lens = load_tweets_naive("data/dev/text.npy", params["max_len"])
    labels = load_labels_naive("data/dev/labels.npy")

    inputs = make_inputs(vocab, glove_weights, params["max_len"])
    train_model = model_fn("train", inputs, params)
    eval_model = model_fn("eval", inputs, params, reuse=True)

    if params.get("restore"):
        print(params)
        evaluate_model(inputs, params, eval_model, params["test_set_size"])
Пример #22
0
    ][0]

    dev_filenames = os.listdir(dev_data_dir)
    dev_features_data = [
        os.path.join(dev_data_dir, f) for f in dev_filenames if f == 'X.npy'
    ][0]
    dev_labels_data = [
        os.path.join(dev_data_dir, f) for f in dev_filenames if f == "Y.npy"
    ][0]

    # Specify the sizes of the dataset we train on and evaluate on
    train_data_loaded = np.load(train_features_data, mmap_mode='r')
    params.train_size = train_data_loaded.shape[0]

    dev_data_loaded = np.load(dev_features_data, mmap_mode='r')
    params.eval_size = dev_data_loaded.shape[0]

    # Create the two iterators over the two datasets
    train_inputs = input_fn(train_features_data, train_labels_data, params)
    dev_inputs = input_fn(dev_features_data, dev_labels_data, params)

    # Define the model
    logging.info("Creating the model...")
    train_model_spec = model_fn('train', train_inputs, params)
    dev_model_spec = model_fn('eval', dev_inputs, params, reuse=True)

    # Train the model
    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
    train_and_evaluate(train_model_spec, dev_model_spec, args.model_dir,
                       params, args.restore_from)