Exemplo n.º 1
0
def figure_ground_a(*args, **kwargs):
    dataset = cx.Dataset()
    url = "https://raw.githubusercontent.com/Calysto/conx-data/master/gridfonts/figure_ground_a.npy"
    path = get_file("figure_ground_a.npy", origin=url)
    ds = np.load(path)
    ## [[[letter], [brim, body]], ...]
    letters = np.array([pair[0] for pair in ds])
    brims = np.array([pair[1][0] for pair in ds])
    bodies = np.array([pair[1][1] for pair in ds])
    dataset.name = "Figure-Ground A"
    dataset.description = """
This dataset (the so-called a-tabase) originates from Douglas
Hofstadter's research group:

http://goosie.cogsci.indiana.edu/pub/gridfonts.data

![Gridfont Grid](https://github.com/Calysto/conx-data/raw/master/gridfonts/grid.png)

These data (all the letter A) have been processed to make them neural
network friendly:

https://github.com/Calysto/conx-data/blob/master/gridfonts/gridfonts.py

The brim and body parts have been idenified manually.  The dataset is
composed of letters on a 17 row x 9 column grid (4 lines not used on
top and another 4 not used on the bottom of each letter were removed
from the original 25x9 latter images). The inputs are composed of the
full letter. The targets are composed of a picture of the body and
the brim.

You can read a thesis using part of this dataset here:
https://repository.brynmawr.edu/compsci_pubs/78/
"""
    dataset.load_direct([letters], [brims, bodies])
    return dataset
Exemplo n.º 2
0
def dataset_downloader():
    filename = get_file('qm9.tar.gz', DATASET_URL, extract=True,
                        cache_dir=DATA_PATH, cache_subdir=DATA_PATH)
    os.rename(DATA_PATH + 'gdb9.sdf', DATA_PATH + 'qm9.sdf')
    os.rename(DATA_PATH + 'gdb9.sdf.csv', DATA_PATH + 'qm9.sdf.csv')
    os.remove(DATA_PATH + 'qm9.tar.gz')
    return filename
Exemplo n.º 3
0
def gridfonts(*args, **kwargs):
    dataset = cx.Dataset()
    url = "https://raw.githubusercontent.com/Calysto/conx-data/master/gridfonts/gridfonts.npy"
    path = get_file("gridfonts.npy", origin=url)
    ds = np.load(path)
    ## [letters, labels]
    letters = np.array([matrix for matrix in ds[0]])
    targets = np.array([matrix for matrix in ds[0]])
    labels = np.array([char for char in ds[1]], dtype=str)
    dataset.name = "Gridfonts"
    dataset.description = """
This dataset originates from Douglas Hofstadter's research
group:

http://goosie.cogsci.indiana.edu/pub/gridfonts.data

![Gridfont Grid](https://github.com/Calysto/conx-data/raw/master/gridfonts/grid.png)

These data have been processed to make them neural
network friendly:

https://github.com/Calysto/conx-data/blob/master/gridfonts/gridfonts.py

The dataset is composed of letters on a 25 row x 9 column
grid. The inputs and targets are identical, and the labels
contain a string identifying the letter.

You can read a thesis using part of this dataset here:
https://repository.brynmawr.edu/compsci_pubs/78/
"""
    dataset.load_direct([letters], [targets], [labels])
    return dataset
Exemplo n.º 4
0
    def _download_(self):

        challenges = {
            'CN': 'CBTest/data/cbtest_CN_cbt_{}.txt',
            'NE': 'CBTest/data/cbtest_N_{}.txt',
            'P': 'CBTest/data/cbtest_P_{}.txt',
            'V': 'CBTest/data/cbtest_V_{}.txt',
            # 'generic': 'CBTest/data/cbt_{}.txt',
        }

        path = get_file('CBTest.tar', origin=self.__URL__)

        with tarfile.open(path) as tar:

            challenge = challenges[self.__task__]
            train = 'train'
            valid = 'valid_2000ex'
            test = 'test_2500ex'

            ex_file = tar.extractfile(challenge.format(train))
            train_stories = self.__get_stories__(ex_file, only_supporting=self.__only_supporting__)

            ex_file = tar.extractfile(challenge.format(valid))
            train_stories += self.__get_stories__(ex_file, only_supporting=self.__only_supporting__)

            ex_file = tar.extractfile(challenge.format(test))
            test_stories = self.__get_stories__(ex_file, only_supporting=self.__only_supporting__)

        return train_stories, test_stories
Exemplo n.º 5
0
def load_cifar10():
    # download and extract data
    dirname = 'cifar-10-batches-py'
    origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
    path = get_file(dirname,
                    origin,
                    untar=True,
                    cache_dir='Z:\\',
                    cache_subdir="datasets")

    num_train_samples = 50000

    x_train = np.zeros((num_train_samples, 3, 32, 32), dtype='uint8')
    y_train = np.zeros((num_train_samples, ), dtype='uint8')

    # load train data
    for i in range(1, 6):
        fpath = os.path.join(path, 'data_batch_' + str(i))
        data, labels = load_batch(fpath)
        x_train[(i - 1) * 10000:i * 10000, :, :, :] = data
        y_train[(i - 1) * 10000:i * 10000] = labels

    # load test data
    fpath = os.path.join(path, 'test_batch')
    x_test, y_test = load_batch(fpath)

    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    if backend.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

    return (x_train, y_train), (x_test, y_test)
Exemplo n.º 6
0
    def __init__(self):
        logger.info('Loading Deeplab')

        local_path = os.path.join(config.WEIGHT_PATH, config.DEEPLAB_FILENAME)
        self.weights_path = get_file(os.path.abspath(local_path), config.DEEPLAB_URL, cache_subdir='models')

        self.graph = tf.Graph()
        with self.graph.as_default():
            self.image_placeholder = tf.placeholder(tf.float32, shape=(None, None, None, 3))
            self.net = DeepLabResNetModel({'data': self.image_placeholder}, is_training=False,
                                          num_classes=self.NUM_CLASSES)

            restore_var = tf.global_variables()

            # Set up TF session and initialize variables.
            config_tf = tf.ConfigProto()
            config_tf.gpu_options.allow_growth = True
            self.sess = tf.Session(config=config_tf)
            init = tf.global_variables_initializer()

            self.sess.run(init)

            # Load weights.
            loader = tf.train.Saver(var_list=restore_var)
            loader.restore(self.sess, self.weights_path)
Exemplo n.º 7
0
    def download_imagenet(self):
        """ Download pre-trained weights for the specified backbone name.
        This name is in the format mobilenet{rows}_{alpha} where rows is the
        imagenet shape dimension and 'alpha' controls the width of the network.
        For more info check the explanation from the keras mobilenet script itself.
        """

        alpha = float(self.backbone.split('_')[1])
        rows = int(self.backbone.split('_')[0].replace('mobilenet', ''))

        # load weights
        if keras.backend.image_data_format() == 'channels_first':
            raise ValueError('Weights for "channels_last" format '
                             'are not available.')
        if alpha == 1.0:
            alpha_text = '1_0'
        elif alpha == 0.75:
            alpha_text = '7_5'
        elif alpha == 0.50:
            alpha_text = '5_0'
        else:
            alpha_text = '2_5'

        model_name = 'mobilenet_{}_{}_tf_no_top.h5'.format(alpha_text, rows)
        weights_url = mobilenet.BASE_WEIGHT_PATH + model_name
        weights_path = get_file(model_name, weights_url, cache_subdir='models')

        return weights_path
Exemplo n.º 8
0
def get_embeddings_index(
    embedding_name: str,
    url: str = None,
    embeddings_filename: str = None,
    parent_dir: str = None,
    cache_dir: str = None,
    embedding_dimensions: int = 300,
) -> Dict[str, ndarray]:
    """
    High level function for get an embedding index, usually from a public url, downloading and
    caching locally.

    :param embedding_name: the name of the embeddings, used to look up metadata values
    :param url: the URL where the embeddings may be found; this parameter overrides the baked in
    metadata paths
    :param embeddings_filename: the filename; usually appended onto the url
    :param parent_dir: where to store the files locally, if not specified then the keras cache
    directory will be used.
    :param cache_dir: where to store the files locally, if parent_dir is not specified then the
    keras cache directory will be used.
    :param embedding_dimensions: integer: 300 or 100, 50 etc
    :return: a dictionary of strings to ndarray of embedding values

    """
    file_template = ""
    if embedding_name in EMBEDDINGS_METADATA:
        url, file_template = EMBEDDINGS_METADATA[embedding_name]

    if not cache_dir and parent_dir:
        cache_dir = os.path.join(parent_dir, "data", embedding_name)
        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)

    parts = urlparse(str(url))
    filename = parts.path.split("/")[-1]

    parent_path = Path(str(cache_dir))
    embeddings_dir = parent_path / "datasets"

    if embeddings_filename:
        embeddings_file = embeddings_filename
    else:
        if "{" in file_template and "}" in file_template:
            embeddings_file = file_template.format(embedding_dimensions)
        else:
            embeddings_file = file_template
    embed_file = embeddings_dir / embeddings_file
    if not embed_file.exists():  # if not exists, fetch
        LOG.info("initializing, please wait.")
        data_archive = get_file(fname=filename,
                                origin=url,
                                cache_dir=cache_dir,
                                untar=False,
                                extract=True)  # pylint disable:unused-variable
        LOG.info("Done initializing")
        if not data_archive:
            LOG.warning("Fail in fetch")
    embeddings_index = load_embeddings(
        str(embed_file), embedding_dimensions=embedding_dimensions)
    return embeddings_index
Exemplo n.º 9
0
def gridfonts(*args, **kwargs):
    dataset = cx.Dataset()
    url = "https://raw.githubusercontent.com/Calysto/conx-data/master/gridfonts/gridfonts.npy"
    path = get_file("gridfonts.npy", origin=url)
    ds = np.load(path)
    ## [letters, labels]
    letters = np.array([matrix for matrix in ds[0]])
    targets = np.array([matrix for matrix in ds[0]])
    labels = np.array([char for char in ds[1]], dtype=str)
    dataset.name = "Gridfonts"
    dataset.description = """
This dataset originates from Douglas Hofstadter's research
group:

http://goosie.cogsci.indiana.edu/pub/gridfonts.data

![Gridfont Grid](https://github.com/Calysto/conx-data/raw/master/gridfonts/grid.png)

These data have been processed to make them neural
network friendly:

https://github.com/Calysto/conx-data/blob/master/gridfonts/gridfonts.py

The dataset is composed of letters on a 25 row x 9 column
grid. The inputs and targets are identical, and the labels
contain a string identifying the letter.

You can read a thesis using part of this dataset here:
https://repository.brynmawr.edu/compsci_pubs/78/
"""
    dataset.load_direct([letters], [targets], [labels])
    return dataset
Exemplo n.º 10
0
def figure_ground_a(*args, **kwargs):
    dataset = cx.Dataset()
    url = "https://raw.githubusercontent.com/Calysto/conx-data/master/gridfonts/figure_ground_a.npy"
    path = get_file("figure_ground_a.npy", origin=url)
    ds = np.load(path)
    ## [[[letter], [brim, body]], ...]
    letters = np.array([pair[0] for pair in ds])
    brims = np.array([pair[1][0] for pair in ds])
    bodies = np.array([pair[1][1] for pair in ds])
    dataset.name = "Figure-Ground A"
    dataset.description = """
This dataset (the so-called a-tabase) originates from Douglas
Hofstadter's research group:

http://goosie.cogsci.indiana.edu/pub/gridfonts.data

![Gridfont Grid](https://github.com/Calysto/conx-data/raw/master/gridfonts/grid.png)

These data (all the letter A) have been processed to make them neural
network friendly:

https://github.com/Calysto/conx-data/blob/master/gridfonts/gridfonts.py

The brim and body parts have been idenified manually.  The dataset is
composed of letters on a 17 row x 9 column grid (4 lines not used on
top and another 4 not used on the bottom of each letter were removed
from the original 25x9 latter images). The inputs are composed of the
full letter. The targets are composed of a picture of the body and
the brim.

You can read a thesis using part of this dataset here:
https://repository.brynmawr.edu/compsci_pubs/78/
"""
    dataset.load_direct([letters], [brims, bodies])
    return dataset
Exemplo n.º 11
0
    def download_imagenet(self):
        """ Download pre-trained weights for the specified backbone name.
        This name is in the format mobilenet{rows}_{alpha} where rows is the
        imagenet shape dimension and 'alpha' controls the width of the network.
        For more info check the explanation from the keras mobilenet script itself.
        """

        alpha = float(self.backbone.split('_')[1])
        rows = int(self.backbone.split('_')[0].replace('mobilenet', ''))

        # load weights
        if keras.backend.image_data_format() == 'channels_first':
            raise ValueError('Weights for "channels_last" format '
                             'are not available.')
        if alpha == 1.0:
            alpha_text = '1_0'
        elif alpha == 0.75:
            alpha_text = '7_5'
        elif alpha == 0.50:
            alpha_text = '5_0'
        else:
            alpha_text = '2_5'

        model_name = 'mobilenet_{}_{}_tf_no_top.h5'.format(alpha_text, rows)
        weights_url = mobilenet.BASE_WEIGHT_PATH + model_name
        weights_path = get_file(model_name, weights_url, cache_subdir='models')

        return weights_path
Exemplo n.º 12
0
def load_model_weights(weights_collection, model, dataset, classes,
                       include_top):
    weights = find_weights(weights_collection, model.name, dataset,
                           include_top)

    if weights:
        weights = weights[0]

        if include_top and weights['classes'] != classes:
            raise ValueError('If using `weights` and `include_top`'
                             ' as true, `classes` should be {}'.format(
                                 weights['classes']))

        weights_path = get_file(weights['name'],
                                weights['url'],
                                cache_subdir='models',
                                md5_hash=weights['md5'])

        model.load_weights(weights_path)

    else:
        raise ValueError(
            'There is no weights for such configuration: ' +
            'model = {}, dataset = {}, '.format(model.name, dataset) +
            'classes = {}, include_top = {}.'.format(classes, include_top))
Exemplo n.º 13
0
    def __init__(self):
        logger.info('Loading Tensorflow Detection API')

        weights_path = get_file(config.SSD_INCEPTION_FILENAME, config.SSD_INCEPTION_URL,
                                cache_dir=os.path.abspath(config.WEIGHT_PATH),
                                cache_subdir='models')

        extract_path = weights_path.replace('.tar.gz', '')
        if not os.path.exists(extract_path):
            tar = tarfile.open(weights_path, "r:gz")
            tar.extractall(path=os.path.join(config.WEIGHT_PATH, 'models'))
            tar.close()
        pb_path = os.path.join(extract_path, self.PB_NAME)

        self.graph = tf.Graph()
        with self.graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(pb_path, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        self.label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS)
        self.categories = label_map_util.convert_label_map_to_categories(self.label_map,
                                                                         max_num_classes=self.NUM_CLASSES,
                                                                         use_display_name=True)
        self.category_index = label_map_util.create_category_index(self.categories)
Exemplo n.º 14
0
def load_data():
    """This function downloads, extracts, loads, normalizes and one-hot encodes Flower Photos dataset"""
    # download the dataset and extract it
    data_dir = get_file(origin='https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',
                                         fname='flower_photos', untar=True)
    data_dir = pathlib.Path(data_dir)

    # count how many images are there
    image_count = len(list(data_dir.glob('*/*.jpg')))
    print("Number of images:", image_count)

    # get all classes for this dataset (types of flowers) excluding LICENSE file
    CLASS_NAMES = np.array([item.name for item in data_dir.glob('*') if item.name != "LICENSE.txt"])

    # roses = list(data_dir.glob('roses/*'))
    # 20% validation set 80% training set
    image_generator = ImageDataGenerator(rescale=1/255, validation_split=0.2)

    # make the training dataset generator
    train_data_gen = image_generator.flow_from_directory(directory=str(data_dir), batch_size=batch_size,
                                                        classes=list(CLASS_NAMES), target_size=(IMAGE_SHAPE[0], IMAGE_SHAPE[1]),
                                                        shuffle=True, subset="training")
    # make the validation dataset generator
    test_data_gen = image_generator.flow_from_directory(directory=str(data_dir), batch_size=batch_size,
                                                        classes=list(CLASS_NAMES), target_size=(IMAGE_SHAPE[0], IMAGE_SHAPE[1]),
                                                        shuffle=True, subset="validation")

    return train_data_gen, test_data_gen, CLASS_NAMES
Exemplo n.º 15
0
def load_data(width=128, height=128):
    """
    Load CALTECH256 dataset, resize and cache for future load
    :param width: Width to resize
    :param height: Height to resize
    :return: Image data and labels
    """

    img_res = str(width) + 'x' + str(height)
    try:
        image_data = np.load(file='caltech256_' + img_res + '.images.npy')
        image_label = np.load(file='caltech256_' + img_res + '.labels.npy')
        image_data, image_label = shuffle(image_data, image_label, random_state=0)
        return image_data, image_label
    except IOError:
        print('Cached images not found! Loading images...')

    dirname = '256_ObjectCategories'
    origin = 'http://www.vision.caltech.edu/Image_Datasets/Caltech256/256_ObjectCategories.tar'
    path = get_file(dirname, origin=origin, untar=True)

    num_samples = 30607
    image_data = np.empty((num_samples, width, height, 3), dtype='uint8')
    image_label = np.empty((num_samples,), dtype='uint8')

    category_list = os.listdir(path)
    sample = 0
    for i, category in enumerate(category_list):

        # Progress bar
        print('\r', end='', flush=True)
        progress = round(i/len(category_list)*100)
        print("[%-100s] %3d%%" % ('#'*progress, progress), end='', flush=True)

        category_path = path + '/' + category + '/'
        image_list = os.listdir(category_path)

        for image in image_list:
            if image.lower().endswith('.jpg'):
                img = load_img(category_path + image)
                img = img_to_array(img)
                img = resize(img, (width, height))

                image_data[sample, :, :, :] = img
                image_label[sample] = int(category.split('.')[0])
                sample += 1

    print(flush=True)

    image_data = image_data[:sample, :, :, :]
    image_label = image_label[:sample]

    # Save images and labels to cache file
    np.save(file='caltech256_' + img_res + '.images', arr=image_data)
    np.save(file='caltech256_' + img_res + '.labels', arr=image_label)

    image_data, image_label = shuffle(image_data, image_label, random_state=0)

    return image_data, image_label
Exemplo n.º 16
0
    def load_pretrained_model(self):
        cache_subdir = 'pretrained_models'

        prefix = 'https://fancy-nlp-1253403094.cos.ap-shanghai.myqcloud.com/pretrained_models/'

        preprocessor_file = get_file(
            fname='msra_ner_bilstm_cnn_crf_preprocessor.pkl',
            origin=prefix + 'msra_ner_bilstm_cnn_crf_preprocessor.pkl',
            cache_subdir=cache_subdir)
        json_file = get_file(fname='msra_ner_bilstm_cnn_crf.json',
                             origin=prefix + 'msra_ner_bilstm_cnn_crf.json',
                             cache_subdir=cache_subdir)
        weights_file = get_file(fname='msra_ner_bilstm_cnn_crf.hdf5',
                                origin=prefix + 'msra_ner_bilstm_cnn_crf.hdf5',
                                cache_subdir=cache_subdir)

        self.load(preprocessor_file, json_file, weights_file)
Exemplo n.º 17
0
 def _local_model(self, remote_location, model_path):
     """
     Builds a local model
     :param remote_location: where to get the h5 file
     :param model_path: Where to save the files, defaults to settings['PIPOTTER_MODEL_DIRECTORY'] 
     """
     logger.debug("Loading model file")
     model_file = get_file(fname=H5,
                           origin="{}/{}".format(remote_location, H5),
                           cache_dir=model_path)
     self.model = load_model(model_file)
     class_file = get_file(fname=CLASSES,
                           origin="{}/{}".format(remote_location, CLASSES),
                           cache_dir=model_path)
     logger.debug("Loading class file")
     with open(class_file, 'r') as jfile:
         self.classes = loads(jfile.read())
Exemplo n.º 18
0
    def load_pretrained_model(self):
        cache_subdir = 'pretrained_models'

        preprocessor_file = get_file(
            fname='toutiao_text_classification_cnn_preprocessor.pkl',
            origin=MODEL_STORAGE_PREFIX + 'toutiao_text_classification_cnn_preprocessor.pkl',
            cache_subdir=cache_subdir, cache_dir=CACHE_DIR)
        json_file = get_file(
            fname='toutiao_text_classification_cnn.json',
            origin=MODEL_STORAGE_PREFIX + 'toutiao_text_classification_cnn.json',
            cache_subdir=cache_subdir, cache_dir=CACHE_DIR)
        weights_file = get_file(
            fname='toutiao_text_classification_cnn.hdf5',
            origin=MODEL_STORAGE_PREFIX + 'toutiao_text_classification_cnn.hdf5',
            cache_subdir=cache_subdir, cache_dir=CACHE_DIR)

        self.load(preprocessor_file, json_file, weights_file)
Exemplo n.º 19
0
 def __init__(self):
     """
     :param predictor_model_path: path to shape_predictor_68_face_landmarks.dat file
     """
     predictor_model_path = unpack_bz2(get_file('shape_predictor_68_face_landmarks.dat.bz2',
                                                LANDMARKS_MODEL_URL, cache_subdir='temp'))
     self.detector = dlib.get_frontal_face_detector() # cnn_face_detection_model_v1 also can be used
     self.shape_predictor = dlib.shape_predictor(predictor_model_path)
Exemplo n.º 20
0
def download_imagenet(backbone):
    validate_backbone(backbone)

    weights_path = get_file(
        'squeezenet_weights_tf_dim_ordering_tf_kernels_notop.h5',
        WEIGHTS_PATH_NO_TOP,
        cache_subdir='models')
    return weights_path
Exemplo n.º 21
0
def pic_cut(path1, path2):
    landmarks_model_path = unpack_bz2(
        get_file('shape_predictor_68_face_landmarks.dat.bz2',
                 LANDMARKS_MODEL_URL,
                 cache_subdir='temp'))
    landmarks_detector = LandmarksDetector(landmarks_model_path)
    face_landmarks = landmarks_detector.get_landmarks(path1)
    image_align(path1, path2, face_landmarks)
Exemplo n.º 22
0
    def initModel(self, dataset_name):
        OS = 8
        assert dataset_name in [
            'CDnet', 'SBI', 'UCSD'
        ], 'dataset_name must be either one in ["CDnet", "SBI", "UCSD"]]'
        assert len(self.img_shape) == 3
        h, w, d = self.img_shape
        img_input = Input(shape=(h, w, d), name='img_input')
        x, skip1 = mobilenetV2(img_input)
        b4 = AveragePooling2D(pool_size=(int(np.ceil(h / OS)),
                                         int(np.ceil(w / OS))))(x)

        b4 = Conv2D(256, (1, 1),
                    padding='same',
                    use_bias=False,
                    name='image_pooling')(b4)
        b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4)
        b4 = Activation('relu')(b4)

        b4 = Lambda(lambda x: tf.image.resize_bilinear(
            x, size=(int(np.ceil(h / OS)), int(np.ceil(w / OS)))))(b4)

        # simple 1x1
        b0 = Conv2D(256, (1, 1), padding='same', use_bias=False,
                    name='aspp0')(x)
        b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0)
        b0 = Activation('relu', name='aspp0_activation')(b0)
        x = Concatenate()([b4, b0])
        x = Conv2D(256, (1, 1),
                   padding='same',
                   use_bias=False,
                   name='concat_projection')(x)
        x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
        x = Activation('relu')(x)
        x = Dropout(0.1)(x)

        x = Conv2D(1, (1, 1), padding='same', name='custom_logits_semantic')(x)
        x = Lambda(lambda x: tf.image.resize_bilinear(x, size=(h, w)))(x)
        x = Activation('sigmoid')(x)

        model = Model(img_input, x, name='DeepLab3+')

        # opt = keras.optimizers.RMSprop(lr=self.lr, rho=0.9, epsilon=1e-08, decay=0.)

        # Since UCSD has no void label, we do not need to filter out
        if dataset_name == 'UCSD':
            c_loss = loss2
            c_acc = acc2
        else:
            c_loss = loss
            c_acc = acc
        weights_path = get_file(
            'deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5',
            self.mobile_weights_path,
            cache_subdir='models')
        model.load_weights(weights_path, by_name=True)
        model.compile(loss=c_loss, optimizer=Adam(self.lr), metrics=[c_acc])
        return model
Exemplo n.º 23
0
def download_files(files, target_dir, url):
    """Download files by URLS, function get_file use TensorFlow"""
    data_paths = []
    for file in files:
        path = target_dir + file
        data_path = get_file(file, url + file) if not os.path.exists(path) else path
        data_paths.append(path)
    print('All files downloaded')
    return data_paths
Exemplo n.º 24
0
def load_PSPNet():
    random.seed(0)
    class_colors = [(random.randint(0,255),random.randint(0,255),random.randint(0,255)) for _ in range(5000)]
    model_url = "https://getfile.dokpub.com/yandex/get/https://yadi.sk/d/BR1EAlZ-UQMzQQ"
    model_config = get_file('PSP-Net.h5', model_url)
    model = load_model(model_config, custom_objects={'Interp': Interp})
    global graph
    graph = ktf.get_default_graph()
    return model, class_colors
Exemplo n.º 25
0
 def save(self, url):
     """
     save pretrained models
     """
     unzipped = _os.path.join(self.file_path, self.model_name)
     path = _utils.get_file(self.model_name + ".gz", url)
     with open(unzipped, "wb") as fout:
         zcat = _subprocess.Popen(["zcat"], stdin=open(path), stdout=fout)
         zcat.wait()
Exemplo n.º 26
0
def pspnet_50_ADE_20K_SUNRGB(height=473, width=473):

    model_url = "https://www.dropbox.com/s/" \
                "0uxn14y26jcui4v/pspnet50_ade20k.h5?dl=1"
    latest_weights = get_file("pspnet50_ade20k.h5", model_url)

    model = pspnet_50_sunrgb(input_height=height, input_width=width)
    model.load_weights(latest_weights)
    return model
Exemplo n.º 27
0
def _download_data():
    _ = get_file('qm9.tar.gz',
                 DATASET_URL,
                 extract=True,
                 cache_dir=DATA_PATH,
                 cache_subdir=DATA_PATH)
    os.rename(DATA_PATH + 'gdb9.sdf', DATA_PATH + 'qm9.sdf')
    os.rename(DATA_PATH + 'gdb9.sdf.csv', DATA_PATH + 'qm9.sdf.csv')
    os.remove(DATA_PATH + 'qm9.tar.gz')
Exemplo n.º 28
0
    def __init__(self, args, batch_size=1, perc_model=None, sess=None):
        self.sess = tf.get_default_session() if sess is None else sess
        K.set_session(self.sess)
        self.epsilon = 0.00000001
        self.lr = args.lr
        self.decay_rate = args.decay_rate
        self.decay_steps = args.decay_steps
        self.img_size = args.image_size
        self.layer = args.use_vgg_layer
        self.vgg_loss = args.use_vgg_loss
        self.face_mask = args.face_mask
        self.use_grabcut = args.use_grabcut
        self.scale_mask = args.scale_mask
        self.mask_dir = args.mask_dir
        if (self.layer <= 0 or self.vgg_loss <= self.epsilon):
            self.vgg_loss = None
        self.pixel_loss = args.use_pixel_loss
        if (self.pixel_loss <= self.epsilon):
            self.pixel_loss = None
        self.mssim_loss = args.use_mssim_loss
        if (self.mssim_loss <= self.epsilon):
            self.mssim_loss = None
        self.lpips_loss = args.use_lpips_loss
        if (self.lpips_loss <= self.epsilon):
            self.lpips_loss = None
        self.l1_penalty = args.use_l1_penalty
        if (self.l1_penalty <= self.epsilon):
            self.l1_penalty = None
        self.adaptive_loss = args.use_adaptive_loss
        self.sharpen_input = args.sharpen_input
        self.batch_size = batch_size
        if perc_model is not None and self.lpips_loss is not None:
            self.perc_model = perc_model
        else:
            self.perc_model = None
        self.ref_img = None
        self.ref_weight = None
        self.perceptual_model = None
        self.ref_img_features = None
        self.features_weight = None
        self.loss = None
        self.discriminator_loss = args.use_discriminator_loss
        if (self.discriminator_loss <= self.epsilon):
            self.discriminator_loss = None
        if self.discriminator_loss is not None:
            self.discriminator = None
            self.stub = create_stub(batch_size)

        if self.face_mask:
            import dlib
            self.detector = dlib.get_frontal_face_detector()
            LANDMARKS_MODEL_URL = 'http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2'
            landmarks_model_path = unpack_bz2(
                get_file('shape_predictor_68_face_landmarks.dat.bz2',
                         LANDMARKS_MODEL_URL,
                         cache_subdir='temp'))
            self.predictor = dlib.shape_predictor(landmarks_model_path)
Exemplo n.º 29
0
def coco_download(dataset_path, filenames, dataset_root, urls, md5s, annotation_paths):
    zip_paths = coco_files(dataset_path, filenames, dataset_root, urls, md5s, annotation_paths)
    for url, filename, md5 in zip(urls, filenames, md5s):
        path = get_file(filename, url, md5_hash=md5, extract=True, cache_subdir=dataset_path)
        # TODO(ahundt) check if it is already extracted, don't re-extract. see
        # https://github.com/fchollet/keras/issues/5861
        zip_file = zipfile.ZipFile(path, 'r')
        zip_file.extractall(path=dataset_path)
        zip_file.close()
Exemplo n.º 30
0
def test_get_file():
    drusen_model = get_file(
        'drusen_model.h5',
        deepseenet.deepseenet_drusen.DRUSEN_PATH,
        cache_dir='models',
        md5_hash=deepseenet.deepseenet_drusen.DRUSEN_MD5
    )
    print(drusen_model)
    assert os.path.exists(drusen_model)
Exemplo n.º 31
0
def load_dataset_npz(path, url):
    """loads an .npz file of saved image data, and returns the images and their
    associated labels as numpy arrays
    """
    from keras.utils import get_file
    path = get_file(path, origin=url)
    f = np.load(path, allow_pickle=True)
    images, labels = f['data'], f['labels']
    return images, labels
def loadGenderModel():
    # Source of the model
    source = "https://s3.ap-south-1.amazonaws.com/arunponnusamy/pre-trained-weights/gender_detection.model"
    path = get_file("gender_detection.model",
                    source,
                    cache_subdir="pre-trained",
                    cache_dir=os.getcwd())
    # Load and return the model
    return load_model(path)
Exemplo n.º 33
0
    def __init__(self):
        mloc = get_file(
            "dlib_face_recognition_resnet_model_v1.dat",
            origin="https://github.com/distant-viewing/dvt/"
            "releases/download/0.0.1/"
            "dlib_face_recognition_resnet_model_v1.dat",
        )
        self.dlib = importlib.import_module("dlib")

        self.encode = self.dlib.face_recognition_model_v1(mloc)

        mloc = get_file(
            "shape_predictor_5_face_landmarks.dat",
            origin="https://github.com/distant-viewing/dvt/"
            "releases/download/0.0.1/"
            "shape_predictor_5_face_landmarks.dat",
        )
        self.pose = self.dlib.shape_predictor(mloc)
Exemplo n.º 34
0
def pascal_voc_download(dataset_path, filenames, dataset_root, urls, md5s):
    zip_paths = pascal_voc_files(dataset_path, filenames, dataset_root, urls,
                                 md5s)
    for url, filename, md5 in zip(urls, filenames, md5s):
        path = get_file(filename,
                        url,
                        md5_hash=md5,
                        extract=True,
                        cache_subdir=dataset_path)
Exemplo n.º 35
0
def load_dataset_npz(path, url):
    """loads an .npz file of saved image data, and returns the images and their
    associated labels as numpy arrays
    """
    from keras.utils import get_file
    path = get_file(path, origin=url)
    f = np.load(path)
    images, labels = f['data'], f['labels']
    return images, labels
Exemplo n.º 36
0
def load_dataset_npz(path, url):
    """loads a normed face dataset file and returns a numpy array of shape
    (num, vector_size) with dtype float32, and an array of label strings
    """
    from keras.utils import get_file
    path = get_file(path, origin=url)
    f = np.load(path)
    images, labels = f['data'], f['labels']
    return images, labels
Exemplo n.º 37
0
def get_glove_embeddings(embeddings):
    keras_dataset_dir = expanduser('~/.keras/datasets/')
    glove_zip_file_url = 'http://nlp.stanford.edu/data/glove.840B.300d.zip'
    glove_zip_file = 'glove.840B.300d.zip'
    glove_file = 'glove.840B.300d.txt'
    if not exists(keras_dataset_dir + glove_zip_file):
        zf = ZipFile(get_file(glove_zip_file, glove_zip_file_url))
        zf.extract(glove_file, path=keras_dataset_dir)

    print("Processing", glove_file)
Exemplo n.º 38
0
def colors(*args, path='colors.csv',
           url="https://raw.githubusercontent.com/Calysto/conx-data/master/colors/colors.csv",
           **kwargs):
    dataset = cx.Dataset()
    from keras.utils import get_file
    path = get_file(path, origin=url)
    fp = open(path, "r")
    reader = csv.reader(fp)
    inputs = []
    labels = []
    targets = []
    count = 1
    for line in reader:
        name, r, g, b = line
        if name == "name": continue # first line is header
        inputs.append([float(int(r)/255), float(int(g)/255), float(int(b)/255)])
        targets.append([count])
        labels.append(name)
        count += 1
    inputs = np.array(inputs, dtype='float32')
    targets = np.array(targets, dtype='uint16')
    dataset.name = "Colors"
    dataset.description = """
Original source: https://github.com/andrewortman/colorbot

This dataset also includes some ignored in original data.

Inspired by:

* http://aiweirdness.com/tagged/paint-colors

When initially loaded, this database has the following format:

* labels: [color_name_string, ...] # order matches target
* inputs: [[red, green, blue], ...] # scaled between 0 and 1
* targets: [[int], ...] # number of label

For example:

```
>>> import conx as cx
>>> ds = cx.Dataset.get("colors")
>>> ds.labels[0], ds.inputs[0], ds.targets[0]
('tidewater',
 [0.7686274647712708, 0.843137264251709, 0.8352941274642944],
 [1])
```
"""
    dataset.load_direct([inputs], [targets], [labels])
    return dataset
Exemplo n.º 39
0
def resnet_50(input_shape):
    img_input = Input(input_shape)
    x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', name='conv1')(img_input)
    if input_shape[-1] > 3:
        x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', name='conv1_changed')(img_input)
    x = BatchNormalization(name='bn_conv1')(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x)

    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')

    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')

    print("Loading pretrained weights for Resnet50...")
    weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
                            resnet50_padding.WEIGHTS_PATH_NO_TOP,
                            cache_subdir='models',
                            md5_hash='a268eb855778b3df3c7506639542a6af')
    model = Model(img_input, x)
    model.load_weights(weights_path, by_name=True)
    if input_shape[-1] > 3:
        print("Loading weights for conv1 layer separately for the first 3 channels")
        conv1_weights = np.zeros((7, 7, input_shape[-1], 64), dtype="float32")
        resnet_ori = ResNet50(include_top=False, input_shape=(224, 224, 3))
        conv1_weights[:, :, :3, :] = resnet_ori.get_layer("conv1").get_weights()[0][:, :, :, :]
        # random init
        conv1_weights[:, :, 3:, :] = model.get_layer('conv1_changed').get_weights()[0][:, :, 3:, :]
        bias = resnet_ori.get_layer("conv1").get_weights()[1]
        model.get_layer('conv1_changed').set_weights((conv1_weights, bias))
        model.get_layer('conv1_changed').name = 'conv1'

    return model
Exemplo n.º 40
0
    def download_imagenet(self):
        """ Download pre-trained weights for the specified backbone name.
        This name is in the format {backbone}_weights_tf_dim_ordering_tf_kernels_notop
        where backbone is the densenet + number of layers (e.g. densenet121).
        For more info check the explanation from the keras densenet script itself:
            https://github.com/keras-team/keras/blob/master/keras/applications/densenet.py
        """
        origin    = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/'
        file_name = '{}_weights_tf_dim_ordering_tf_kernels_notop.h5'

        # load weights
        if keras.backend.image_data_format() == 'channels_first':
            raise ValueError('Weights for "channels_first" format are not available.')

        weights_url = origin + file_name.format(self.backbone)
        return get_file(file_name.format(self.backbone), weights_url, cache_subdir='models')
Exemplo n.º 41
0
def mnist_h5(*args, **kwargs):
    """
    Load the Keras MNIST dataset from an H5 file.
    """
    import h5py

    path = "mnist.h5"
    url = "https://raw.githubusercontent.com/Calysto/conx-data/master/mnist/mnist.h5"
    path = get_file(path, origin=url)
    h5 = h5py.File(path, "r")
    dataset = cx.Dataset()
    dataset._inputs = h5["inputs"]
    dataset._targets = h5["targets"]
    dataset._labels = h5["labels"]
    dataset.h5 = h5
    dataset.name = "MNIST-H5"
    dataset.description = description
    dataset._cache_values()
    return dataset
Exemplo n.º 42
0
    def download_imagenet(self):
        """ Downloads ImageNet weights and returns path to weights file.
        Weights can be downloaded at https://github.com/fizyr/keras-models/releases .
        """
        if self.backbone == 'vgg16':
            resource = keras.applications.vgg16.WEIGHTS_PATH_NO_TOP
            checksum = '6d6bbae143d832006294945121d1f1fc'
        elif self.backbone == 'vgg19':
            resource = keras.applications.vgg19.WEIGHTS_PATH_NO_TOP
            checksum = '253f8cb515780f3b799900260a226db6'
        else:
            raise ValueError("Backbone '{}' not recognized.".format(self.backbone))

        return get_file(
            '{}_weights_tf_dim_ordering_tf_kernels_notop.h5'.format(self.backbone),
            resource,
            cache_subdir='models',
            file_hash=checksum
        )
Exemplo n.º 43
0
    def download_imagenet(self):
        """ Downloads ImageNet weights and returns path to weights file.
        """
        resnet_filename = 'ResNet-{}-model.keras.h5'
        resnet_resource = 'https://github.com/fizyr/keras-models/releases/download/v0.0.1/{}'.format(resnet_filename)
        depth = int(self.backbone.replace('resnet', ''))

        filename = resnet_filename.format(depth)
        resource = resnet_resource.format(depth)
        if depth == 50:
            checksum = '3e9f4e4f77bbe2c9bec13b53ee1c2319'
        elif depth == 101:
            checksum = '05dc86924389e5b401a9ea0348a3213c'
        elif depth == 152:
            checksum = '6ee11ef2b135592f8031058820bb9e71'

        return get_file(
            filename,
            resource,
            cache_subdir='models',
            md5_hash=checksum
        )
Exemplo n.º 44
0
def SqueezeNet(input_tensor=None, input_shape=None,
               weights='imagenet',
               classes=1000):
    

        
    if weights not in {'imagenet', None}:
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `imagenet` '
                         '(pre-training on ImageNet).')

    if weights == 'imagenet' and classes != 1000:
        raise ValueError('If using `weights` as imagenet with `include_top`'
                         ' as true, `classes` should be 1000')


    input_shape = _obtain_input_shape(input_shape,
                                      default_size=227,
                                      min_size=48,
                                      data_format=K.image_data_format(),
                                      include_top=True)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor


    x = Convolution2D(64, (3, 3), strides=(2, 2), padding='valid', name='conv1')(img_input)
    x = Activation('relu', name='relu_conv1')(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool1')(x)

    x = fire_module(x, fire_id=2, squeeze=16, expand=64)
    x = fire_module(x, fire_id=3, squeeze=16, expand=64)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool3')(x)

    x = fire_module(x, fire_id=4, squeeze=32, expand=128)
    x = fire_module(x, fire_id=5, squeeze=32, expand=128)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool5')(x)

    x = fire_module(x, fire_id=6, squeeze=48, expand=192)
    x = fire_module(x, fire_id=7, squeeze=48, expand=192)
    x = fire_module(x, fire_id=8, squeeze=64, expand=256)
    x = fire_module(x, fire_id=9, squeeze=64, expand=256)
    x = Dropout(0.5, name='drop9')(x)

    x = Convolution2D(classes, (1, 1), padding='valid', name='conv10')(x)
    x = Activation('relu', name='relu_conv10')(x)
    x = GlobalAveragePooling2D()(x)
    out = Activation('softmax', name='loss')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    model = Model(inputs, out, name='squeezenet')

    # load weights
    if weights == 'imagenet':

        weights_path = get_file('squeezenet_weights_tf_dim_ordering_tf_kernels.h5',
                                    WEIGHTS_PATH,
                                    cache_subdir='models')
        model.load_weights(weights_path)

        if K.image_data_format() == 'channels_first':

            if K.backend() == 'tensorflow':
                warnings.warn('You are using the TensorFlow backend, yet you '
                              'are using the Theano '
                              'image data format convention '
                              '(`image_data_format="channels_first"`). '
                              'For best performance, set '
                              '`image_data_format="channels_last"` in '
                              'your Keras config '
                              'at ~/.keras/keras.json.')
    return model