Exemplo n.º 1
0
    def get_mat_id(self, mat_id_name='mat_id'):
        """
        Get material ID numbers of the underlying mesh elements.
        """
        if self.source is not None:
            dm = DatasetManager(dataset=self.source.outputs[0])

            mat_id = dm.cell_scalars[mat_id_name]
            return mat_id
Exemplo n.º 2
0
    def test_should_print_ascii(self):
        self.maxDiff = None
        result = """+---------------------+------------+-----------------------------------------------------------------------------+
|     description     | identifier |                                    source                                   |
+---------------------+------------+-----------------------------------------------------------------------------+
|  my little dataset  |  one_test  | https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv |
| my little dataset 2 |  two_test  | https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv |
+---------------------+------------+-----------------------------------------------------------------------------+"""
        data = DatasetManager("./tests/resources/multiple_data")
        printer = Printer(data.get_datasets())
        self.assertEqual(result, printer.__repr__())
Exemplo n.º 3
0
    def test_should_read_yaml_from_dir(self):

        expected = {
            "one_test": {
                "source": "http://source/teste",
                "description": "my little dataset"
            }
        }

        data = DatasetManager("./tests/resources/one_data")
        self.assertDictEqual(data.get_datasets(), expected)
Exemplo n.º 4
0
    def test_should_get_dataset(self):

        data = DatasetManager("./tests/resources/local_data")
        dataset = {
            "local_test": {
                "source": "./tests/resources/local_data/train.csv",
                "description": "my little dataset local"
            }
        }
        self.assertDictEqual(data.get_dataset("local_test"),
                             dataset.get("local_test"))
Exemplo n.º 5
0
def main():
    dataset_manager = DatasetManager()

    epochs = 10
    output_dir = os.path.abspath("workspace")
    model = MaskRCNN(output_dir)

    ratio = 0.2
    datasetA, datasetB = dataset_manager.split_dataset('ade20k_train',
                                                       ratio=ratio)
    weights = model.train(datasetA, weights=None, epochs=10)
    small_datasetB = dataset_manager.random_subset(datasetB, 1000)
    result = model.predict(small_datasetB, weights)
Exemplo n.º 6
0
 def test_should_remove_dataset(self):
     data = DatasetManager(self.trash_dir, fs=self.os)
     identifier = "data_name"
     dataset = {
         "identifier": identifier,
         "description": "description",
         "source": "/tmp/test.csv"
     }
     data.create_dataset(**dataset)
     self.assertTrue(
         os.path.isfile("{}/{}.yaml".format(self.trash_dir, identifier)))
     self.assertEqual(len(os.listdir(self.trash_dir)), 2)
     data.remove_dataset(identifier)
     self.assertFalse(
         os.path.isfile("{}/{}.yaml".format(self.trash_dir, identifier)))
     self.assertEqual(len(os.listdir(self.trash_dir)), 1)
Exemplo n.º 7
0
 def setUp(self):
     with open('training_set_list.pickle', 'rb') as handle:
         self.training_dict = pickle.load(handle)
     with open('validation_set_list.pickle', 'rb') as handle:
         self.validation_dict = pickle.load(handle)
     with open('test_set_list.pickle', 'rb') as handle:
         self.test_dict = pickle.load(handle)
     with open('genres.json') as json_data:
         self.genres = json.load(json_data)
     with open('labels.json') as json_data:
         self.dataset = json.load(json_data)
     self.dataset_manager = DatasetManager(self.training_dict,
                                           self.validation_dict,
                                           self.test_dict, self.genres,
                                           self.dataset)
     self.batch_size = 50
Exemplo n.º 8
0
def evaluate(experiment_name, step=''):
    logging.info('*' * 50)
    logging.info('RUNNING EVALUATION FOR MODEL: %s', experiment_name)
    if step == '':
        interesting_checkpoint = tf.train.latest_checkpoint(
            os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name))
    else:
        interesting_checkpoint = os.path.join(CURRENT_DIR, '..', 'checkpoint',
                                              experiment_name,
                                              'step-{}'.format(step))
    dataset_manager = DatasetManager()
    dataset_manager.boot()

    with tf.Graph().as_default() as gr:
        logging.info('-- Restoring graph for model: %s',
                     interesting_checkpoint)
        saver = tf.train.import_meta_graph(
            '{}.meta'.format(interesting_checkpoint))
        logging.info('-- Restored graph for model named: %s',
                     interesting_checkpoint)

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)).as_default() as sess:
            saver.restore(sess=sess, save_path=interesting_checkpoint)
            logging.info('-- Restored variables for model named: %s',
                         interesting_checkpoint)
            list_predictions = []
            list_labels = []
            for docs, labels in dataset_manager.get_test_by_batch(
                    batch_size=FLAGS.BATCH_SIZE):
                tf_input = gr.get_tensor_by_name('input/tf_input:0')
                tf_predictions = gr.get_tensor_by_name('prediction:0')

                prediction = sess.run(tf_predictions,
                                      feed_dict={tf_input: docs})
                list_predictions.extend(prediction)
                list_labels.extend(labels)
                logging.debug('-- Prediction length: %s/%s',
                              len(list_predictions),
                              dataset_manager.test_y.shape[0])
            logging.info('-- Report for model: %s', experiment_name)
            logging.info(
                classification_report(y_true=list_labels,
                                      y_pred=list_predictions,
                                      digits=4))
            logging.info(
                confusion_matrix(y_true=list_labels, y_pred=list_predictions))
Exemplo n.º 9
0
def add_subdomains_surface(obj,
                           position,
                           mat_id_name='mat_id',
                           threshold_limits=(None, None),
                           **kwargs):
    dm = DatasetManager(dataset=obj.outputs[0])
    mat_id = dm.cell_scalars[mat_id_name]

    rm = mat_id.min(), mat_id.max()

    active = mlab.pipeline.set_active_attribute(obj)
    active.cell_scalars_name = mat_id_name

    aa = mlab.pipeline.set_active_attribute(obj)
    aa.cell_scalars_name = mat_id_name

    threshold = mlab.pipeline.threshold(aa)
    threshold.threshold_filter.progress = 1.0
    if threshold_limits[0] is not None:
        threshold.lower_threshold = threshold_limits[0] + 0.1
    if threshold_limits[1] is not None:
        threshold.upper_threshold = threshold_limits[1] - 0.1

    threshold.auto_reset_lower = False
    threshold.auto_reset_upper = False

    surface = mlab.pipeline.surface(threshold, opacity=0.3)
    surface.actor.actor.position = position

    module_manager = surface.parent
    lm = module_manager.scalar_lut_manager
    lm.lut_mode = 'Blues'
    if (rm[1] - rm[0]) == 1:
        lm.reverse_lut = True

    surface2 = mlab.pipeline.surface(active, opacity=0.2)
    surface2.actor.actor.position = position

    module_manager = surface2.parent
    module_manager.scalar_lut_manager.lut_mode = 'Blues'

    return surface, surface2
Exemplo n.º 10
0
def main():
    output_dir = os.path.abspath("workspace")

    dataset_manager = DatasetManager()
    model = MaskRCNN(output_dir)

    ratio = 0.2
    datasetA, datasetB = dataset_manager.split_dataset('ade20k_train',
                                                       ratio=ratio)

    weights = model.train(datasetA, weights=None, epochs=10)

    while True:
        small_datasetB = dataset_manager.random_subset(datasetB, 100)
        predictions = model.predict(small_datasetB, weights)
        annotations = annotator.filter(predictions)
        new_dataset = dataset_manager.create_dataset_with_new_annotations(
            datasetB, annotations)

        weights = model.train(new_dataset, weights, epochs=1)
Exemplo n.º 11
0
    def test_should_create_dataset_with_custom_data(self):
        data = DatasetManager(self.trash_dir, fs=self.os)
        identifier = "data_name_custom"
        dataset = {
            "identifier": identifier,
            "description": "description",
            "source": "/tmp/test.csv"
        }
        data.create_dataset(**dataset)
        self.assertTrue(
            self.os.isfile("{}/{}.yaml".format(self.trash_dir, identifier)))

        self.assertEqual(len(os.listdir(self.trash_dir)), 2)
        loaded_dataset = data.get_datasets()
        self.assertEqual(list(loaded_dataset.keys()), [identifier])

        datasource_configs = loaded_dataset.get(identifier)
        self.assertEqual(datasource_configs["description"],
                         dataset["description"])
        self.assertEqual(datasource_configs["source"], dataset["source"])
Exemplo n.º 12
0
    def test_should_read_multiple_yaml_from_dir(self):

        expected = {
            "one_test": {
                "source":
                "https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv",
                "description": "my little dataset"
            },
            "two_test": {
                "source":
                "https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv",
                "description": "my little dataset 2"
            }
        }

        data = DatasetManager("./tests/resources/multiple_data", fs=self.os)
        result = list(data.get_datasets().keys())
        result.sort()
        expected = ["one_test", "two_test"]
        self.assertListEqual(expected, result)
Exemplo n.º 13
0
def predict(list_docs, experiment_name, step='', batch_size=64):

    logging.info('*' * 50)
    logging.info('RUNNING PREDICT FOR MODEL: %s', experiment_name)
    if step == '':
        interesting_checkpoint = tf.train.latest_checkpoint(os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name))
    else:
        interesting_checkpoint = os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name, 'step-{}'.format(step))
    dataset_manager = DatasetManager()
    dataset_manager.boot()

    list_preprocessed_sentences = preprocessor.preprocess(list_docs)

    list_vecs = dataset_manager.text2vec.doc_to_vec(list_preprocessed_sentences)
    print(dataset_manager.text2vec.vec_to_doc(list_vecs))
    list_vecs = dataset_manager.equalize_vector_length_to_np(list_vectors=list_vecs,
                                                             max_length=model_v6.SENTENCE_LENGTH_MAX)

    with tf.Graph().as_default() as gr:
        logging.info('-- Restoring graph for model: %s', interesting_checkpoint)
        saver = tf.train.import_meta_graph('{}.meta'.format(interesting_checkpoint))
        logging.info('-- Restored graph for model named: %s', interesting_checkpoint)

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default() as sess:
            saver.restore(sess=sess, save_path=interesting_checkpoint)
            logging.info('-- Restored variables for model named: %s', interesting_checkpoint)
            list_predictions = []

            num_steps = len(list_vecs) // batch_size
            logging.info('There will be %s steps', num_steps + 1)
            for i in range(num_steps + 1):
                tf_input = gr.get_tensor_by_name('input/tf_input:0')
                tf_predictions = gr.get_tensor_by_name('prediction:0')

                prediction = sess.run(tf_predictions, feed_dict={
                    tf_input: list_vecs[i*batch_size: (i+1)*batch_size]
                })
                list_predictions.extend([dataset_manager.LABEL_UNMAPPING[p] for p in prediction])

            return list_predictions
Exemplo n.º 14
0
    def test_should_print_html(self):
        self.maxDiff = None
        result = """<table>
    <tr>
        <th>description</th>
        <th>identifier</th>
        <th>source</th>
    </tr>
    <tr>
        <td>my little dataset</td>
        <td>one_test</td>
        <td>https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv</td>
    </tr>
    <tr>
        <td>my little dataset 2</td>
        <td>two_test</td>
        <td>https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv</td>
    </tr>
</table>"""
        data = DatasetManager("./tests/resources/multiple_data")
        printer = Printer(data.get_datasets())
        self.assertEqual(result, printer._repr_html_())
Exemplo n.º 15
0
    def test_should_create_dataset(self):
        data = DatasetManager(self.trash_dir, fs=self.os)
        identifier = "data_name"
        dataset = {
            "identifier": identifier,
            "description": "description",
            "source": "/tmp/test.csv",
        }

        data.create_dataset(**dataset)

        loaded_datasets = data.get_datasets()
        dataset_config = loaded_datasets.get(identifier)

        self.assertTrue(
            self.os.isfile("{}/{}.yaml".format(self.trash_dir, identifier)))
        self.assertEqual(len(self.os.listdir(self.trash_dir)), 2)

        self.assertEqual(list(loaded_datasets.keys())[0], identifier)
        self.assertEqual(dataset_config.get("description"),
                         dataset["description"])
        self.assertEqual(dataset_config.get("source"), dataset["source"])
Exemplo n.º 16
0
    def create_source(self):
        """
        Create a VTK source from data in a SfePy-supported file.

        Notes
        -----
        All data need to be set here, otherwise time stepping will not
        work properly - data added by user later will be thrown away on
        time step change.
        """
        if self.io is None:
            self.read_common(self.filename)

        dataset = self.create_dataset()

        try:
            out = self.io.read_data(self.step)
        except ValueError:
            out = None

        if out is not None:
            self.add_data_to_dataset(dataset, out)

        if self.mat_id_name is not None:
            mat_id = self.mesh.cmesh.cell_groups
            if self.single_color:
                rm = mat_id.min(), mat_id.max()
                mat_id[mat_id > rm[0]] = rm[1]

            dm = DatasetManager(dataset=dataset)
            dm.add_array(mat_id, self.mat_id_name, 'cell')

        src = VTKDataSource(data=dataset)
#        src.print_traits()
#        debug()
        return src
Exemplo n.º 17
0
    def test_should_get_dataset_unknown(self):

        data = DatasetManager("./tests/resources/local_data")
        with self.assertRaises(IOError):
            data.get_dataset("unknown_test")
def main():
    # Load dataset manager
    with open('training_set_list.pickle', 'rb') as handle:
        training_set = pickle.load(handle)
    with open('validation_set_list.pickle', 'rb') as handle:
        validation_set = pickle.load(handle)
    with open('test_set_list.pickle', 'rb') as handle:
        test_set = pickle.load(handle)
    with open('genres.json') as json_data:
        genres = json.load(json_data)
    with open('labels.json') as json_data:
        labels = json.load(json_data)
    dataset_manager = DatasetManager(training_set, validation_set, test_set,
                                     genres, labels)

    batch_size = 1
    n_classes = 26

    # Graph input
    x = tf.placeholder(tf.float32, [batch_size, 227, 227, 3])
    y = tf.placeholder(tf.float32, [None, n_classes])
    keep_var = tf.placeholder(tf.float32)

    # Model
    pred = Model.alexnet(x, keep_var)  # definition of the network architecture

    # Loss and optimize

    # Init
    init = tf.global_variables_initializer()

    # Initialize an saver for store model checkpoints
    saver = tf.train.Saver()

    # Launch the graph
    with tf.Session() as sess:
        sess.run(init)

        # Load pretrained model
        # Skip weights from fc8 (fine-tuning)
        # load_with_skip('pretrained_alexnet.npy', sess, ['fc8'])
        # saver.restore(sess, "saved_models/MSE_without_data_augmentation_0.75_0.001/film_genre_model.ckpt")
        saver.restore(
            sess, "saved_models/models/model_dropout05_mean_square_error.ckpt")
        print('Model Restored')

        test_map_global = 0.
        test_count = 0
        # test accuracy by group of batch_size images
        for _ in range(int(len(dataset_manager.test_list) / batch_size) + 1):
            batch_tx, batch_ty = dataset_manager.next_batch(batch_size, 'test')
            # print(batch_tx[0], batch_ty[0])
            test_output = sess.run(pred, feed_dict={x: batch_tx, keep_var: 1})
            # print(test_output[0])
            MAP = mean_average_precision(test_output, batch_ty)
            test_map_global += MAP
            test_count += 1
        test_map_global /= test_count
        print("Global Test Accuracy = {:.4f}".format(test_map_global))
        # Load one image
        img = cv2.imread('saved_models/images_tests/yellow.jpg')

        img = cv2.resize(img, (227, 227))
        img = img.astype(np.float32)
        img -= np.array([104., 117., 124.])
        print(img)
        test_output = sess.run(pred,
                               feed_dict={
                                   x: np.reshape(img, (1, 227, 227, 3)),
                                   keep_var: 1
                               })
        score_dict = {}
        for score, genre in zip(test_output[0], genres):
            score_dict[genre] = score
        print(list(reversed(sorted(score_dict.items(), key=lambda x: x[1]))))
Exemplo n.º 19
0
    def add_data_to_dataset(self, dataset, data):
        """Add point and cell data to the dataset."""
        dim = self.dim
        sym = (dim + 1) * dim / 2

        dm = DatasetManager(dataset=dataset)
        for key, val in data.iteritems():
            vd = val.data
##             print vd.shape
            if val.mode == 'vertex':
                if vd.shape[1] == 1:
                    aux = vd.reshape((vd.shape[0],))

                elif vd.shape[1] == 2:
                    zz = nm.zeros((vd.shape[0], 1), dtype=vd.dtype)
                    aux = nm.c_[vd, zz]

                elif vd.shape[1] == 3:
                    aux = vd

                else:
                    raise ValueError('unknown vertex data format! (%s)'\
                                     % vd.shape)

                dm.add_array(aux, key, 'point')

            elif val.mode == 'cell':
                ne, aux, nr, nc = vd.shape
                if (nr == 1) and (nc == 1):
                    aux = vd.reshape((ne,))

                elif (nr == dim) and (nc == 1):
                    if dim == 3:
                        aux = vd.reshape((ne, dim))
                    else:
                        zz = nm.zeros((vd.shape[0], 1), dtype=vd.dtype);
                        aux = nm.c_[vd.squeeze(), zz]

                elif (((nr == sym) or (nr == (dim * dim))) and (nc == 1)) \
                         or ((nr == dim) and (nc == dim)):
                    vd = vd.squeeze()

                    if dim == 3:
                        if nr == sym:
                            aux = vd[:,[0,3,4,3,1,5,4,5,2]]
                        elif nr == (dim * dim):
                            aux = vd[:,[0,3,4,6,1,5,7,8,2]]
                        else:
                            aux = vd.reshape((vd.shape[0], dim*dim))
                    else:
                        zz = nm.zeros((vd.shape[0], 1), dtype=vd.dtype);
                        if nr == sym:
                            aux = nm.c_[vd[:,[0,2]], zz, vd[:,[2,1]],
                                        zz, zz, zz, zz]
                        elif nr == (dim * dim):
                            aux = nm.c_[vd[:,[0,2]], zz, vd[:,[3,1]],
                                        zz, zz, zz, zz]
                        else:
                            aux = nm.c_[vd[:,0,[0,1]], zz, vd[:,1,[0,1]],
                                        zz, zz, zz, zz]

                dm.add_array(aux, key, 'cell')
Exemplo n.º 20
0
from flask import Flask, send_file
from flask_restful import Resource, Api, reqparse
from flask_cors import CORS
import numpy as np
from data_backend import Dataset as HDF_Dataset
from dataset_manager import DatasetManager
from thumbnailer import Thumbnailer
from utils import merge_overlapping_filters

DATASET_PATH = "./datasets"
dataset_manager = DatasetManager(DATASET_PATH)
API_BASE_STR = "/api/v1"

# Init thumbnails (clean directory)
thumbnailer = Thumbnailer("./thumbnails")
thumbnailer.clean();

dataset_list = []
for dset_index, name in enumerate(dataset_manager.get_dataset_names()):
    dset = HDF_Dataset(DATASET_PATH, name)
    dataset_list.append({
        "id": dset_index,
        "name": name,
        "device": {
            "name": dset.device.name,
            "version": dset.device.version
        },
        "subsets": [
            {
                "id": subset_index,
                "name": subset,
Exemplo n.º 21
0
def run(experiment_name):
    with tf.Graph().as_default() as gr:
        with tf.variable_scope('input'):
            tf_input = tf.placeholder(dtype=tf.int32,
                                      shape=[None, model.SENTENCE_LENGTH_MAX],
                                      name='tf_input')
            tf_labels = tf.placeholder(dtype=tf.int32,
                                       shape=[None],
                                       name='tf_labels')

        tf_logits = model.inference(tf_input)
        tf_loss = model.loss(tf_logits, tf_labels)

        tf_optimizer, tf_global_step = model.optimize(tf_loss)
        model.measure_acc(tf_logits, tf_labels)

        tf_all_summary = tf.summary.merge_all()

        tf_train_writer = tf.summary.FileWriter(logdir=os.path.join(
            CURRENT_DIR, '..', 'summary', 'train_' + experiment_name),
                                                graph=gr)
        tf_test_writer = tf.summary.FileWriter(logdir=os.path.join(
            CURRENT_DIR, '..', 'summary', 'test_' + experiment_name),
                                               graph=gr)

        tf_embedding_writer = tf.summary.FileWriter(logdir=os.path.join(
            CURRENT_DIR, '..', 'checkpoint', experiment_name))

        # Visual word embedding
        config = projector.ProjectorConfig()
        embedding = config.embeddings.add()
        embedding.tensor_name = 'embedding/word_embeddings'  # Reference model_v6.py
        embedding.metadata_path = os.path.join(CURRENT_DIR, 'data',
                                               DatasetManager.VOCAB_FILE)
        projector.visualize_embeddings(tf_embedding_writer, config)

        saver = tf.train.Saver(max_to_keep=5,
                               keep_checkpoint_every_n_hours=0.03)

        logging.info('Graph size: %s', utils.count_trainable_variables())

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.GPU)
        with tf.Session(config=tf.ConfigProto(
                gpu_options=gpu_options,
                allow_soft_placement=True,
                log_device_placement=FLAGS.LOG_DEVICE_PLACEMENT)).as_default(
                ) as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())

            dataset_manager = DatasetManager()
            dataset_manager.boot()

            for docs, labels in dataset_manager.get_batch(
                    batch_size=FLAGS.BATCH_SIZE,
                    number_epochs=FLAGS.NUMBER_EPOCHS):
                _, global_step = sess.run([tf_optimizer, tf_global_step],
                                          feed_dict={
                                              tf_input: docs,
                                              tf_labels: labels
                                          })
                summary_interval_step = 10
                if global_step % summary_interval_step == 0:
                    logging.debug('Global step: %s', global_step)
                    train_summary_data = sess.run(tf_all_summary,
                                                  feed_dict={
                                                      tf_input: docs,
                                                      tf_labels: labels
                                                  })
                    tf_train_writer.add_summary(train_summary_data,
                                                global_step=global_step)

                if global_step % summary_interval_step == 0:
                    docs_test, labels_test = dataset_manager.get_test_set(
                        FLAGS.TEST_SIZE, is_shuffled=True)
                    test_summary_data = sess.run(tf_all_summary,
                                                 feed_dict={
                                                     tf_input: docs_test,
                                                     tf_labels: labels_test
                                                 })
                    tf_test_writer.add_summary(test_summary_data,
                                               global_step=global_step)

                if global_step % 200 == 0:
                    path_to_save = os.path.join(CURRENT_DIR, '..',
                                                'checkpoint', experiment_name)
                    if not os.path.exists(path_to_save):
                        os.makedirs(path_to_save)
                    saved_file = saver.save(sess,
                                            save_path=os.path.join(
                                                path_to_save, 'step'),
                                            global_step=global_step,
                                            write_meta_graph=True)
                    logging.debug('Saving model at %s', saved_file)
def run(experiment_name):
    BEST_THRES = 3
    WORST_THRES = 3
    POPULATION_STEPS = 500
    ITERATIONS = 100
    POPULATION_SIZE = 10
    accuracy_hist = np.zeros((POPULATION_SIZE, POPULATION_STEPS))
    l1_scale_hist = np.zeros((POPULATION_SIZE, POPULATION_STEPS))
    best_accuracy_hist = np.zeros((POPULATION_STEPS, ))
    best_l1_scale_hist = np.zeros((POPULATION_STEPS, ))

    with tf.Graph().as_default() as gr:

        with tf.variable_scope('input'):
            tf_input = tf.placeholder(
                dtype=tf.int32,
                shape=[
                    None, model_population_based_tunning.SENTENCE_LENGTH_MAX
                ],
                name='tf_input')
            tf_labels = tf.placeholder(dtype=tf.int32,
                                       shape=[None],
                                       name='tf_labels')

        models = [
            create_model(
                i, is_included_regularization=FLAGS.IS_INCLUDED_REGULARIZATION)
            for i in range(10)
        ]
        # It will help us with creation of different scope_name for each model
        for index, model in enumerate(models):
            with tf.variable_scope(str(index)):
                model.boot(tf_input, tf_labels)

        logging.info('Graph size: %s', utils.count_trainable_variables())

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.GPU)
        with tf.Session(config=tf.ConfigProto(
                gpu_options=gpu_options,
                allow_soft_placement=True,
                log_device_placement=FLAGS.LOG_DEVICE_PLACEMENT)).as_default(
                ) as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())

            dataset_manager = DatasetManager()
            dataset_manager.boot()

            dataset_generator = dataset_manager.get_batch(
                batch_size=FLAGS.BATCH_SIZE,
                number_epochs=10 * FLAGS.NUMBER_EPOCHS)
            for i in range(POPULATION_STEPS):

                # Copy best
                sess.run([
                    m.get_copy_from_op(models[0])
                    for m in models[-WORST_THRES:]
                ])
                # Perturb others
                sess.run([m.l1_scale_perturb_op for m in models[BEST_THRES:]])
                # Training
                for _ in range(ITERATIONS):
                    docs, labels = next(dataset_generator)
                    sess.run([m.tf_optimizer for m in models],
                             feed_dict={
                                 tf_input: docs,
                                 tf_labels: labels
                             })
                docs, labels = next(dataset_generator)
                # Evaluate
                l1_scales = sess.run({m: m.l1_scale for m in models})
                accuracies = sess.run({m: m.tf_acc
                                       for m in models},
                                      feed_dict={
                                          tf_input: docs,
                                          tf_labels: labels
                                      })
                models.sort(key=lambda m: accuracies[m], reverse=True)
                # Logging
                best_accuracy_hist[i] = accuracies[models[0]]
                best_l1_scale_hist[i] = l1_scales[models[0]]
                for m in models:
                    l1_scale_hist[m.model_id, i] = l1_scales[m]
                    accuracy_hist[m.model_id, i] = accuracies[m]
            with open('temp', 'w') as output_f:
                json.dump(
                    {
                        'accuracy_hist': accuracy_hist,
                        'l1_scale_hist': l1_scale_hist,
                        'best_accuracy_hist': best_accuracy_hist,
                        'best_l1_scale_hist': best_l1_scale_hist
                    }, output_f)
Exemplo n.º 23
0
import sys
from dataset_manager import DatasetManager
from gloss_lookup import GlossLookup

# Instantiate singleton classes
gl = GlossLookup()
dm = DatasetManager()


def menu():
    print()
    print(15 * '-', 'MAIN MENU', 15 * '-')
    print('[1] Gloss Lookup' + '\n[2] Dataset manager' + '\n[3] Exit program')
    print(41 * '-')


def glossLookup():
    loop = True
    while loop:
        gl.menu()
        answer = int(input('Enter the class (an integer between 0 to 999): '))

        if gl.wordExists(answer):
            print('Word: ' + gl.searchGlossary(answer))
            loop = False
        else:
            print('Invalid class. Please enter a valid class.')


def datasetManager():
    loop = True
params = "pd_fixed_trainratio80_outcome_all_data_singletask"
#params = "lstmsize%s_dropout%s_shared%s_specialized%s"%(lstmsize, dropout, n_shared_layers, n_specialized_layers)
checkpoint_prefix = os.path.join(
    output_dir, "checkpoints/model_%s_%s" % (dataset_name, params))
model_filename = glob.glob("%s*.hdf5" % checkpoint_prefix)[-1]
#model_filename = "code/output_files/models/model_28-1.51.h5"
results_file = os.path.join(
    output_dir, "evaluation_results/results_%s_%s_%s.csv" %
    (cls_method, dataset_name, params))

##### MAIN PART ######

print('Preparing data...')
start = time.time()

dataset_manager = DatasetManager(dataset_name)
data = dataset_manager.read_dataset()
train, test = dataset_manager.split_data(
    data, train_ratio, split=data_split_type
)  # to reproduce results of Tax et al., use 'ordered' instead of 'temporal'

dt_train = dataset_manager.encode_data_with_label_all_data(train)
dt_test = dataset_manager.encode_data_with_label_all_data(test)

if normalize_over == "train":
    dataset_manager.calculate_divisors(dt_train)
elif normalize_over == "all":
    dt_all = dataset_manager.extract_timestamp_features(data)
    dt_all = dataset_manager.extract_duration_features(dt_all)
    dataset_manager.calculate_divisors(dt_all)
else:
Exemplo n.º 25
0
    def test_should_remove_unknown_dataset(self):

        data = DatasetManager("./tests/resources/local_data", fs=self.os)
        with self.assertRaises(IOError):
            data.remove_dataset("unknown_dataset")
Exemplo n.º 26
0
def predict(list_sentences,
            output_file,
            experiment_name,
            step='',
            list_labels=[]):
    dataset_manager = DatasetManager()
    dataset_manager.boot()
    list_preprocessed_sentences = preprocessor.preprocess(list_sentences)
    list_vecs = dataset_manager.text2vec.doc_to_vec(
        list_preprocessed_sentences)
    list_vecs = dataset_manager.equalize_vector_length_to_np(
        list_vectors=list_vecs, max_length=model_v1.SENTENCE_LENGTH_MAX)
    list_labels = dataset_manager.convert_labels_to_np(list_labels)

    if step == '':
        interesting_checkpoint = tf.train.latest_checkpoint(
            os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name))
    else:
        interesting_checkpoint = os.path.join(CURRENT_DIR, '..', 'checkpoint',
                                              experiment_name,
                                              'step-{}'.format(step))

    with tf.Graph().as_default() as gr:
        logging.info('-- Restoring graph for model: %s',
                     interesting_checkpoint)
        saver = tf.train.import_meta_graph(
            '{}.meta'.format(interesting_checkpoint))
        logging.info('-- Restored graph for model named: %s',
                     interesting_checkpoint)

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)).as_default() as sess:
            saver.restore(sess=sess, save_path=interesting_checkpoint)
            logging.info('-- Restored variables for model named: %s',
                         interesting_checkpoint)

            tf_input = gr.get_tensor_by_name('input/tf_input:0')
            tf_predictions = gr.get_tensor_by_name('prediction:0')

            prediction = sess.run(tf_predictions,
                                  feed_dict={tf_input: list_vecs})

            if len(list_labels) != 0:
                logging.info('-- Report for model: %s', experiment_name)
                logging.info(
                    classification_report(y_true=list_labels,
                                          y_pred=prediction))

            result_dict = dict()
            result_dict['sentence'] = list_sentences
            result_dict['pre-processed'] = list_preprocessed_sentences
            result_dict[
                'pre-processed_recover'] = dataset_manager.text2vec.vec_to_doc(
                    list_vecs)
            result_dict['predict'] = prediction

            if len(list_labels) != 0:
                result_dict['label'] = list_labels

            pd.DataFrame(result_dict).to_csv(output_file, index=None)
            logging.debug('Saved result at %s', output_file)
def main():
    # Load dataset manager
    with open('training_set_list.pickle', 'rb') as handle:
        training_set = pickle.load(handle)
    with open('validation_set_list.pickle', 'rb') as handle:
        validation_set = pickle.load(handle)
    with open('test_set_list.pickle', 'rb') as handle:
        test_set = pickle.load(handle)
    with open('genres.json') as json_data:
        genres = json.load(json_data)
    with open('labels.json') as json_data:
        labels = json.load(json_data)

    log_file_name = str(datetime.now()) + '-logs.txt'
    with open("logs/" + log_file_name, 'w') as log_file:
        log_file.write('Training logs \n')

    iteration_file_name = str(datetime.now()) + '-iteration.txt'
    with open("logs/" + iteration_file_name, 'w') as log_file:
        log_file.write('Training iterations \n')

    best_iteration_file_name = str(datetime.now()) + '-best_iteration.txt'
    with open("logs/" + best_iteration_file_name, 'w') as log_file:
        log_file.write('Best Training iterations \n')

    dataset_manager = DatasetManager(training_set, validation_set, test_set,
                                     genres, labels)
    # Learning params
    learning_rate = 0.001
    batch_size = 50
    # Nombre d'iterations
    training_iters = 1000
    # display training information (loss, training accuracy, ...) every 10
    # iterations
    local_train_step = 20
    global_validation_step = 25  # test every global_test_step iterations
    global_train_step = 100

    # Network params
    n_classes = 26
    keep_rate = 0.5  # for dropout

    # Graph input
    x = tf.placeholder(tf.float32, [batch_size, 227, 227, 3])
    y = tf.placeholder(tf.float32, [None, n_classes])
    keep_var = tf.placeholder(tf.float32)

    # Model
    pred = Model.alexnet(x, keep_var)  # definition of the network architecture

    # Loss and optimizer
    # loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y))
    loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(y, pred))))
    # optimizer = tf.train.GradientDescentOptimizer(
    #    learning_rate=learning_rate).minimize(loss)
    optimizer = tf.train.RMSPropOptimizer(
        learning_rate=learning_rate).minimize(loss)

    # Init
    init = tf.global_variables_initializer()

    # Initialize an saver for store model checkpoints
    saver = tf.train.Saver()

    # To do early stopping
    max_validation_map = 0

    # Launch the graph
    with tf.Session() as sess:
        sess.run(init)

        # Load pretrained model
        # Skip weights from fc8 (fine-tuning)
        load_with_skip('pretrained_alexnet.npy', sess, ['fc8'])

        print('Start training.')
        step = 1
        while step < training_iters:
            # print("Iter ", step)
            with open("logs/" + iteration_file_name, 'a') as log_file:
                log_file.write("Iter {} \n".format(step))
            batch_xs, batch_ys = dataset_manager.next_batch(
                batch_size, 'train')
            sess.run(optimizer,
                     feed_dict={
                         x: batch_xs,
                         y: batch_ys,
                         keep_var: keep_rate
                     })

            # Display on batch training status
            if step % local_train_step == 0:
                local_train_output = sess.run(pred,
                                              feed_dict={
                                                  x: batch_xs,
                                                  keep_var: 1
                                              })
                MAP = mean_average_precision(local_train_output, batch_ys)
                batch_loss = sess.run(loss,
                                      feed_dict={
                                          x: batch_xs,
                                          y: batch_ys,
                                          keep_var: 1.
                                      })
                with open("logs/" + log_file_name, 'a') as log_file:
                    log_file.write("Iter {} Training Loss = {:.4f}, "
                                   "Mean average precision = {:.4f} \n".format(
                                       step, batch_loss, MAP))

            # Display global training error
            if step % global_train_step == 0:
                train_map_global = 0.
                test_count = 0
                # test accuracy by group of batch_size images
                for _ in range(
                        int(len(dataset_manager.training_list) / batch_size) +
                        1):
                    batch_tx, batch_ty = dataset_manager.next_batch(
                        batch_size, 'train')
                    test_output = sess.run(pred,
                                           feed_dict={
                                               x: batch_tx,
                                               keep_var: 1
                                           })
                    MAP = mean_average_precision(test_output, batch_ty)
                    train_map_global += MAP
                    test_count += 1
                train_map_global /= test_count
                with open("logs/" + log_file_name, 'a') as log_file:
                    log_file.write(
                        "Global Training Accuracy = {:.4f} \n".format(
                            train_map_global))

            # Display global testing error
            if step % global_validation_step == 0:
                validation_map_global = 0.
                validation_count = 0
                # test accuracy by group of batch_size images
                for _ in range(
                        int(len(dataset_manager.test_list) / batch_size) + 1):
                    batch_tx, batch_ty = dataset_manager.next_batch(
                        batch_size, 'val')
                    test_output = sess.run(pred,
                                           feed_dict={
                                               x: batch_tx,
                                               keep_var: 1
                                           })
                    MAP = mean_average_precision(test_output, batch_ty)
                    validation_map_global += MAP
                    validation_count += 1
                validation_map_global /= validation_count
                with open("logs/" + log_file_name, 'a') as log_file:
                    log_file.write(
                        "Iter {} Global Validation Accuracy = {:.4f} \n".
                        format(step, validation_map_global))
                if validation_map_global >= max_validation_map:
                    max_validation_map = validation_map_global
                    with open("logs/" + best_iteration_file_name,
                              'a') as log_file:
                        log_file.write("Iter {}  \n".format(step))
                    # Save model
                    saver.save(
                        sess,
                        "saved_models/model_dropout05_mean_square_error.ckpt")

            step += 1
        # print("Finish!")
        with open("logs/finish", 'w') as finish_file:
            finish_file.write("Finish")