def _build_name_to_model(self): URL = ('https://github.com/oarriaga/altamira-data/' 'releases/download/v0.13/') UNET_power_drill = UNET_VGG16(3, (128, 128, 3)) name = 'UNET-VGG16_POWERDRILL_weights.hdf5' weights_path = get_file(name, URL + name, cache_subdir='paz/models') UNET_power_drill.load_weights(weights_path) UNET_large_clamp = UNET_VGG16(3, (128, 128, 3)) name = 'UNET-VGG16_LARGE-CLAMP_weights.hdf5' weights_path = get_file(name, URL + name, cache_subdir='paz/models') UNET_large_clamp.load_weights(weights_path) UNET_scissors = UNET_VGG16(3, (128, 128, 3)) name = 'UNET-VGG16_SCISSORS_weights.hdf5' weights_path = get_file(name, URL + name, cache_subdir='paz/models') UNET_scissors.load_weights(weights_path) name_to_model = { '035_power_drill': UNET_power_drill, '051_large_clamp': UNET_large_clamp, '037_scissors': UNET_scissors } return name_to_model
def download(self): get_file('qm9.tar.gz', self.url, extract=True, cache_dir=self.path, cache_subdir=self.path) os.remove(osp.join(self.path, 'qm9.tar.gz'))
def load_data(data_set_key: str, a2e_data_path: str = '../../../a2e-data/data', cache_dir: str = None) -> BearingDataSet: """Loads one of the bearing datasets. Parameters ---------- data_set_key: str One of the available dataset keys `400rpm`, `800rpm`, `1200rpm`, `variable_rpm` a2e_data_path: str Local file path to the a2e-data repository cache_dir: str Optional cache directory for the datasets, defaults to `~/.a2e/` or `/tmp/.a2e/` as a fallback Returns ------- BearingDataSet: A bearing data set object """ if a2e_data_path is not None and not a2e_data_path.startswith('http') and not a2e_data_path.startswith('file://'): if os.path.isabs(a2e_data_path): a2e_data_path = 'file://' + os.path.abspath(a2e_data_path) else: bearing_module_path = pathlib.Path(__file__).parent.absolute() absolute_data_path = os.path.abspath(os.path.join(bearing_module_path, a2e_data_path)) if os.name == 'nt': absolute_data_path = f'/{absolute_data_path}'.replace('\\', '/') a2e_data_path = 'file://' + absolute_data_path if not os.path.isdir(a2e_data_path.replace('file://', '')): a2e_data_path = 'https://github.com/maechler/a2e-data/raw/master/data/' if cache_dir is None: cache_dir = os.path.join(Path.home(), '.a2e') if not os.path.exists(cache_dir): os.makedirs(cache_dir) a2e_data_path = a2e_data_path.rstrip('/') + '/' data_set_description_origin = f'{a2e_data_path}{data_set_key}.yaml' data_set_origin = f'{a2e_data_path}{data_set_key}.csv.gz' data_set_description_path = get_file(data_set_key + '.yaml', origin=data_set_description_origin, cache_dir=cache_dir, cache_subdir='datasets/bearing') windows = {} with open(data_set_description_path) as data_set_description_file: data_set_description = yaml.load(data_set_description_file, Loader=yaml.FullLoader) data_set_path = get_file(data_set_key + '.csv.gz', origin=data_set_origin, cache_dir=cache_dir, cache_subdir='datasets/bearing', file_hash=data_set_description['data']['md5_hash'], hash_algorithm='md5') with gzip.open(data_set_path, mode='rt') as data_set_file: data_frame = pd.read_csv(data_set_file, parse_dates=[data_set_description['data']['index_column']], date_parser=lambda x: timestamp_to_date_time(float(x)), quotechar='"', sep=',') data_frame = data_frame.set_index(data_set_description['data']['index_column']) for window_key, window_description in data_set_description['windows'].items(): windows[window_key] = { 'mask': (data_frame.index > window_description['start']) & (data_frame.index <= window_description['end']), 'label': window_description['label'], } return BearingDataSet(data_set_key, data_frame, windows)
def decode_predictions(preds, top=5): LABELS = None if len(preds.shape) == 2: if preds.shape[1] == 2622: fpath = get_file('rcmalli_vggface_labels_v1.npy', V1_LABELS_PATH, cache_subdir=VGGFACE_DIR) LABELS = np.load(fpath) elif preds.shape[1] == 8631: fpath = get_file('rcmalli_vggface_labels_v2.npy', V2_LABELS_PATH, cache_subdir=VGGFACE_DIR) LABELS = np.load(fpath) else: raise ValueError( '`decode_predictions` expects ' 'a batch of predictions ' '(i.e. a 2D array of shape (samples, 2622)) for V1 or ' '(samples, 8631) for V2.' 'Found array with shape: ' + str(preds.shape)) else: raise ValueError( '`decode_predictions` expects ' 'a batch of predictions ' '(i.e. a 2D array of shape (samples, 2622)) for V1 or ' '(samples, 8631) for V2.' 'Found array with shape: ' + str(preds.shape)) results = [] for pred in preds: top_indices = pred.argsort()[-top:][::-1] result = [[str(LABELS[i].encode('utf8')), pred[i]] for i in top_indices] result.sort(key=lambda x: x[1], reverse=True) results.append(result) return results
def download(self): get_file( "qm7b.mat", self.url, extract=True, cache_dir=self.path, cache_subdir=self.path, )
def download(self): get_file( "qm9.tar.gz", self.url, extract=True, cache_dir=self.path, cache_subdir=self.path, ) os.remove(osp.join(self.path, "qm9.tar.gz"))
def main(args): import os import cv2 as cv from tqdm import tqdm from tensorflow.keras import utils datatype = args.datatype dataset = args.dataset if datatype == 'paired': URL = f"https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/{dataset}.tar.gz" print(f"Start downloading the {dataset} dataset !") path_to_zip = utils.get_file(f"{dataset}.tar", origin=URL, extract=True, cache_dir='./') print(f"Downloading Done!") PATH = os.path.join(os.path.dirname(path_to_zip), dataset) for path, subdir, files in os.walk(PATH): if "domain_A" in subdir or not len(files) or "domain" in path: continue data = path.split('/')[-1] print(f"{data} directory processing !") # os.makedirs(os.path.join(path, "domain_A"), exist_ok=True) # os.makedirs(os.path.join(path, "domain_B"), exist_ok=True) os.makedirs(os.path.join(PATH, f"{data}A"), exist_ok=True) os.makedirs(os.path.join(PATH, f"{data}B"), exist_ok=True) for file in tqdm(files): if file.split('.')[-1].lower() not in [ 'jpg', 'jpeg', 'png', 'bmp', 'tiff', 'tif' ]: continue img_path = os.path.join(path, file) img = cv.imread(img_path) h, w, c = img.shape # cv.imwrite(os.path.join(path, "domain_A", file), img[:, :w//2]) # cv.imwrite(os.path.join(path, "domain_B", file), img[:, w//2:]) cv.imwrite(os.path.join(PATH, f"{data}A", file), img[:, :w // 2]) cv.imwrite(os.path.join(PATH, f"{data}B", file), img[:, w // 2:]) os.remove(img_path) os.removedirs(path) else: URL = f"https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/{dataset}.zip" print(f"Start downloading the {dataset} dataset !") path_to_zip = utils.get_file(f"{dataset}.tar", origin=URL, extract=True, cache_dir='./') print(f"Downloading Done!")
def load_data(path="svhn_matlab.npz", type="normal"): """Loads the SVHN dataset. # Arguments path: path where to cache the dataset locally (relative to ~/.keras/datasets). type: any of normal, extra (extra appends ~530K extra images for training) # Returns Tuple of Numpy arrays: `(input_train, target_train), (input_test, target_test)`. """ # Log about loading logging.basicConfig(level=logging.INFO) logging.info('Loading dataset = svhn') # Load data path_train = get_file(f"{path}_train", origin=("http://ufldl.stanford.edu/housenumbers/" "train_32x32.mat")) path_test = get_file(f"{path}_test", origin=("http://ufldl.stanford.edu/housenumbers/" "test_32x32.mat")) # Load data from Matlab file. # Source: https://stackoverflow.com/a/53547262 mat_train = sio.loadmat(path_train) mat_test = sio.loadmat(path_test) # Prepare training data input_train = mat_train["X"] input_train = np.rollaxis(input_train, 3, 0) target_train = mat_train["y"].flatten() # Prepare testing data input_test = mat_test["X"] input_test = np.rollaxis(input_test, 3, 0) target_test = mat_test["y"].flatten() # Append extra data, if required if type == "extra": path_extra = get_file( f"{path}_extra", origin="http://ufldl.stanford.edu/housenumbers/extra_32x32.mat", ) mat_extra = sio.loadmat(path_extra) input_extra = mat_extra["X"] input_extra = np.rollaxis(input_extra, 3, 0) target_extra = mat_extra["y"].flatten() input_train = np.concatenate((input_extra, input_train)) target_train = np.concatenate((target_extra, target_train)) # Warn about citation warn_citation() # Return data return (input_train, target_train), (input_test, target_test)
def download_file(raw_paths, urls): last_except = None for file_name, url in zip(raw_paths, urls): try: get_file(file_name, origin=url) except Exception as e: last_except = e if last_except is not None: raise last_except
def download_weights(weights_links): """ This will download the weights to the path in the dict. :param weights_links: """ path = os.path.join(os.getcwd(), weights_links['path']) if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) get_file(path, weights_links['http'], cache_subdir='models', md5_hash='b7a93b2f9156ccbebe3ca24b41fc5402')
def download_file(raw_paths: Union[List[str], Tuple[str]], urls: Union[List[str], Tuple[str]]): last_except = None for filename, url in zip(raw_paths, urls): try: get_file(filename, origin=url) except Exception as e: last_except = e if last_except is not None: raise last_except
def HourglassNetwork(heads, num_stacks=2, cnv_dim=256, inres=(512, 512), weights='ctdet_coco', dims=[256, 384, 384, 384, 512]): if not (weights in {'ctdet_coco', 'hpdet_coco', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `ctdet_coco` ' '(pre-trained on COCO), `hpdet_coco` (pre-trained on COCO) ' 'or the path to the weights file to be loaded.') input_ref = Input(shape=(inres[2], inres[3], 2), name='HGRef') input_layer = Input(shape=(inres[0], inres[1], 3), name='HGInput') inter = pre(input_layer, cnv_dim) prev_inter = None outputs = [] for i in range(num_stacks): prev_inter = inter _heads, inter = hourglass_module(heads, inter, cnv_dim, i, dims, input_ref) if i == num_stacks - 1: outputs.extend(_heads) if i < num_stacks - 1: inter_ = Conv2D(cnv_dim, 1, use_bias=False, name='inter_.%d.0' % i)(prev_inter) inter_ = BatchNormalization(epsilon=1e-5, name='inter_.%d.bn1' % i)(inter_, training=bn_train) cnv_ = Conv2D(cnv_dim, 1, use_bias=False, name='cnv_.%d.0' % i)(inter) cnv_ = BatchNormalization(epsilon=1e-5, name='cnv_.%d.bn1' % i)(cnv_, training=bn_train) inter = Add(name='inters.%d.inters.add' % i)([inter_, cnv_]) inter = Activation('relu', name='inters.%d.inters.relu' % i)(inter) inter = residual(inter, cnv_dim, 'inters.%d' % i) model = Model(inputs=[input_layer, input_ref], outputs=outputs) # I use pretrain when training if weights == 'ctdet_coco': print('loading ctdet coco') weights_path = get_file( '%s_hg.hdf5' % weights, CTDET_COCO_WEIGHTS_PATH, cache_subdir='models', file_hash='ce01e92f75b533e3ff8e396c76d55d97ff3ec27e99b1bdac1d7b0d6dcf5d90eb') model.load_weights(weights_path, by_name=True) elif weights == 'hpdet_coco': weights_path = get_file( '%s_hg.hdf5' % weights, HPDET_COCO_WEIGHTS_PATH, cache_subdir='models', file_hash='5c562ee22dc383080629dae975f269d62de3a41da6fd0c821085fbee183d555d') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def load_data(path="kmnist.npz", type="kmnist"): """Loads the KMNIST dataset. # Arguments path: path where to cache the dataset locally (relative to ~/.keras/datasets). type: any of kmnist, k49 # Returns Tuple of Numpy arrays: `(input_train, target_train), (input_test, target_test)`. """ # Log about loading logging.basicConfig(level=logging.INFO) logging.info('Loading dataset = kmnist') # Load training images path_train = get_file( f"{path}_{type}_train_imgs", origin=(f"http://codh.rois.ac.jp/kmnist/dataset/{type}/" f"{type}-train-imgs.npz") ) input_train = np.load(path_train)["arr_0"] # Load training labels path_train_labels = get_file( f"{path}_{type}_train_labels", origin=(f"http://codh.rois.ac.jp/kmnist/dataset/{type}/" f"{type}-train-labels.npz") ) target_train = np.load(path_train_labels)["arr_0"] # Load testing images path_test = get_file( f"{path}_{type}_test_imgs", origin=(f"http://codh.rois.ac.jp/kmnist/dataset/{type}/" f"{type}-test-imgs.npz") ) input_test = np.load(path_test)["arr_0"] # Load testing labels path_test_labels = get_file( f"{path}_{type}_test_labels", origin=(f"http://codh.rois.ac.jp/kmnist/dataset/{type}/" f"{type}-test-labels.npz") ) target_test = np.load(path_test_labels)["arr_0"] # Warn about citation warn_citation() # Return data return (input_train, target_train), (input_test, target_test)
def load_data(): # load mnist data (source_data, _), (test_source_data, _) = mnist.load_data() # pad with zeros 28x28 MNIST image to become 32x32 # svhn is 32x32 source_data = np.pad(source_data, ((0,0), (2,2), (2,2)), 'constant', constant_values=0) test_source_data = np.pad(test_source_data, ((0,0), (2,2), (2,2)), 'constant', constant_values=0) # input image dimensions # we assume data format "channels_last" rows = source_data.shape[1] cols = source_data.shape[2] channels = 1 # reshape images to row x col x channels # for CNN output/validation size = source_data.shape[0] source_data = source_data.reshape(size, rows, cols, channels) size = test_source_data.shape[0] test_source_data = test_source_data.reshape(size, rows, cols, channels) # load SVHN data datadir = get_datadir() get_file('train_32x32.mat', origin='http://ufldl.stanford.edu/housenumbers/train_32x32.mat') get_file('test_32x32.mat', 'http://ufldl.stanford.edu/housenumbers/test_32x32.mat') path = os.path.join(datadir, 'train_32x32.mat') target_data = loadmat(path) path = os.path.join(datadir, 'test_32x32.mat') test_target_data = loadmat(path) # source data, target data, test_source data data = (source_data, target_data, test_source_data, test_target_data) filenames = ('mnist_test_source.png', 'svhn_test_target.png') titles = ('MNIST test source images', 'SVHN test target images') return other_utils.load_data(data, titles, filenames)
def asegurar_dataset(self): """Si no está el dataset en local, se descarga""" if not self._ruta_dataset.exists(): self._logger.info("Descarga del dataset del repositorio") self._ruta_dataset.mkdir(parents=True) get_file(origin=self._url_datasets, fname=self._archivo_dataset, extract=True, cache_dir=self._ruta_raiz_dataset, cache_subdir="./") assert self._ruta_dataset_entreno_pintor.exists(), "No existe el directorio de entreno pintor" assert self._ruta_dataset_entreno_foto.exists(), "No existe el directorio de entreno real" assert self._ruta_dataset_test_pintor.exists(), "No existe el directorio de test pintor" assert self._ruta_dataset_test_foto.exists(), "No existe el directorio de test real" assert self._ruta_archivo_muestra_pintor.exists(), "No existe la imagen de muestra del pintor" assert self._ruta_archivo_muestra_foto.exists(), "No existe la imagen de muestra real"
def load_data(path="usps.bz2", path_testing="usps-testing.bz2"): """Loads the USPS Handwritten Digits Dataset. # Arguments path: path where to cache the USPS data locally (relative to ~/.keras/datasets). path_testing: path where to cache the USPS testing data locally (relative to ~/.keras/datasets). # Returns Tuple of Numpy arrays: `(input_train, target_train), (input_test, target_test)`. Input structure: 16x16 image with a digit Target structure: number in the 0.0 - 9.0 range """ # Log about loading logging.basicConfig(level=logging.INFO) logging.info('Loading dataset = usps') # Download data path = get_file(path, origin=("https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/" "datasets/multiclass/usps.bz2")) path_testing = get_file( path_testing, origin=("https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/" "datasets/multiclass/usps.t.bz2")) # Decompress data decompress_train = decompress(path) decompress_test = decompress(path_testing) # Load LIBSVM data into NumPy array (input_train, target_train) = load_to_numpy(decompress_train) (input_test, target_test) = load_to_numpy(decompress_test) # Reshape data input_train = input_train.reshape(input_train.shape[0], 16, 16) input_test = input_test.reshape(input_test.shape[0], 16, 16) # Correct targets (e.g. number 3 is now returned as 4.0) target_train = target_train - 1 target_test = target_test - 1 # Warn about citation warn_citation() # Return data return (input_train, target_train), (input_test, target_test)
def download_imagenet(self): """ Download pre-trained weights for the specified backbone name. This name is in the format mobilenet{rows}_{alpha} where rows is the imagenet shape dimension and 'alpha' controls the width of the network. For more info check the explanation from the keras mobilenet script itself. """ alpha = float(self.backbone.split('_')[1]) rows = int(self.backbone.split('_')[0].replace('mobilenet', '')) # load weights if keras.backend.image_data_format() == 'channels_first': raise ValueError('Weights for "channels_last" format ' 'are not available.') if alpha == 1.0: alpha_text = '1_0' elif alpha == 0.75: alpha_text = '7_5' elif alpha == 0.50: alpha_text = '5_0' else: alpha_text = '2_5' model_name = 'mobilenet_{}_{}_tf_no_top.h5'.format(alpha_text, rows) weights_url = mobilenet.BASE_WEIGHT_PATH + model_name weights_path = get_file(model_name, weights_url, cache_subdir='models') return weights_path
def MobileNet(in_size, weights_url, alpha=1.0, depth_mul=1): """ MobileNet Encoder """ # check data format assert (K.image_data_format() == 'channels_last'),\ 'Currently only `channels_last` mode is supported' # create encoder im_input = Input(shape=(in_size, in_size, 3)) x = _conv_block(im_input, 32, alpha, strides=(2, 2)) x = _depthwise_block(x, 64, alpha, depth_mul, block=1) f1 = x x = _depthwise_block(x, 128, alpha, depth_mul, strides=(2, 2), block=2) x = _depthwise_block(x, 128, alpha, depth_mul, block=3) f2 = x x = _depthwise_block(x, 256, alpha, depth_mul, strides=(2, 2), block=4) x = _depthwise_block(x, 256, alpha, depth_mul, block=5) f3 = x x = _depthwise_block(x, 512, alpha, depth_mul, strides=(2, 2), block=6) x = _depthwise_block(x, 512, alpha, depth_mul, block=7) x = _depthwise_block(x, 512, alpha, depth_mul, block=8) x = _depthwise_block(x, 512, alpha, depth_mul, block=9) x = _depthwise_block(x, 512, alpha, depth_mul, block=10) x = _depthwise_block(x, 512, alpha, depth_mul, block=11) f4 = x x = _depthwise_block(x, 1024, alpha, depth_mul, strides=(2, 2), block=12) x = _depthwise_block(x, 1024, alpha, depth_mul, block=13) f5 = x # load pretrained weights weights_path = get_file('mobilenet_imagenet_weights', weights_url) Model(im_input, x).load_weights(weights_path) return im_input, [f1, f2, f3, f4, f5]
def image_with_YCB_objects(): URL = ('https://github.com/oarriaga/altamira-data/releases/download' '/v0.9.1/image_with_YCB_objects.jpg') filename = os.path.basename(URL) fullpath = get_file(filename, URL, cache_subdir='paz/tests') image = load_image(fullpath) return image
def load_model_weights(_weights_collection, model, dataset, classes, include_top): weights = find_weights(_weights_collection, model.name, dataset, include_top) if weights: weights = weights[0] if include_top and weights['classes'] != classes: raise ValueError('If using `weights` and `include_top`' ' as true, `classes` should be {}'.format( weights['classes'])) weights_path = get_file(weights['name'], weights['url'], cache_subdir='models', md5_hash=weights['md5']) model.load_weights(weights_path) else: raise ValueError( 'There is no weights for such configuration: ' + 'model = {}, dataset = {}, '.format(model.name, dataset) + 'classes = {}, include_top = {}.'.format(classes, include_top))
def download_imagenet(args): if 'vgg' in args.backbone: BASE_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/' elif 'res' in args.backbone: BASE_WEIGHTS_PATH = 'https://github.com/keras-team/keras-applications/releases/download/resnet/' elif 'dense' in args.backbone: BASE_WEIGHTS_PATH = 'https://github.com/keras-team/keras-applications/releases/download/densenet/' elif 'efficient' in args.backbone: BASE_WEIGHTS_PATH = 'https://github.com/Callidior/keras-applications/releases/download/efficientnet/' else: raise ValueError("Backbone '{}' not recognized.".format(args.backbone)) if 'efficient' in args.backbone: file_name = '{}_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'.format(args.backbone.replace('b', '-b')) else: file_name = '{}_weights_tf_dim_ordering_tf_kernels_notop.h5'.format(args.backbone) return get_file( file_name, BASE_WEIGHTS_PATH + file_name, cache_subdir='models', file_hash=WEIGHTS_HASHES[args.backbone] )
def init(): "Load all necessary artifacts to make predictions." title_pp_url = "https://storage.googleapis.com/codenet/issue_labels/issue_label_model_files/title_pp.dpkl" body_pp_url = 'https://storage.googleapis.com/codenet/issue_labels/issue_label_model_files/body_pp.dpkl' model_url = 'https://storage.googleapis.com/codenet/issue_labels/issue_label_model_files/Issue_Label_v1_best_model.hdf5' model_filename = 'downloaded_model.hdf5' #save keyfile pem_string = os.getenv('PRIVATE_KEY') if not pem_string: raise ValueError('Environment variable PRIVATE_KEY was not supplied.') with open('private-key.pem', 'wb') as f: f.write(str.encode(pem_string)) with urlopen(title_pp_url) as f: title_pp = dpickle.load(f) with urlopen(body_pp_url) as f: body_pp = dpickle.load(f) model_path = get_file(fname=model_filename, origin=model_url) model = load_model(model_path) app.graph = tf.get_default_graph() app.issue_labeler = IssueLabeler(body_text_preprocessor=body_pp, title_text_preprocessor=title_pp, model=model)
def fetch_and_read_video(video_url, max_frames=32, resize=(224, 224)): extension = video_url.rsplit(os.path.sep, maxsplit=1)[-1] path = get_file(f'{str(uuid.uuid4())}.{extension}', video_url, cache_dir='.', cache_subdir='.') capture = cv2.VideoCapture(path) frames = [] while len(frames) <= max_frames: frame_read, frame = capture.read() if not frame_read: break frame = crop_center(frame) frame = cv2.resize(frame, resize) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frames.append(frame) capture.release() frames = np.array(frames) if len(frames) < max_frames: repetitions = math.ceil(float(max_frames) / len(frames)) repetitions = int(repetitions) frames = frames.repeat(repetitions, axis=0) frames = frames[:max_frames] return frames / 255.0
def get_model_VGGface(): tf.keras.backend.set_image_data_format('channels_last') weights_path = VGGFACE_DIR+VGGFACE_SAVE_FILENAME if not os.path.exists(weights_path): # load weight file weights_path = get_file(VGGFACE_SAVE_FILENAME, VGGFACE_VGG16_WEIGHTS_LOAD_PATH, cache_subdir=VGGFACE_DIR) # build modle base_model = VGG16(weights=None, include_top=True) x = base_model.layers[-2].output predictions = Dense(2622, activation='softmax')(x) model = Model(inputs=base_model.input, outputs=predictions) # check if model.layers[1].data_format == "channels_first": warn_channel_first() # load weights model.load_weights(weights_path) tf.keras.backend.set_image_data_format('channels_first') return model
def _load_data(self, path, mode, test_size=0.2, seed=0): """Loads dataset. Args: test_size (float): fraction of data to reserve as test data seed (int): the seed for randomly shuffling the dataset Returns: tuple: (x_train, y_train), (x_test, y_test). """ basepath = os.path.expanduser(os.path.join('~', '.keras', 'datasets')) prefix = path.split(os.path.sep)[:-1] data_dir = os.path.join(basepath, *prefix) if prefix else basepath if not os.path.exists(data_dir): os.makedirs(data_dir) elif not os.path.isdir(data_dir): raise IOError('{} exists but is not a directory'.format(data_dir)) path = get_file(path, origin=self.url, file_hash=self.file_hash) train_dict, test_dict = get_data(path, mode=mode, test_size=test_size, seed=seed) x_train, y_train = train_dict['X'], train_dict['y'] x_test, y_test = test_dict['X'], test_dict['y'] return (x_train, y_train), (x_test, y_test)
def download(name, url, path): fname = str(Path(path).resolve() / name) try: file = kutils.get_file(fname, url) return file except Exception: print('Unable to get file {}'.format(name))
def read_data(finish_fast=False): # Get the file try: path = get_file( "babi-tasks-v1-2.tar.gz", origin="https://s3.amazonaws.com/text-datasets/" "babi_tasks_1-20_v1-2.tar.gz") except Exception: print( "Error downloading dataset, please download it manually:\n" "$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2" # noqa: E501 ".tar.gz\n" "$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz" # noqa: E501 ) raise # Choose challenge challenges = { # QA1 with 10,000 samples "single_supporting_fact_10k": "tasks_1-20_v1-2/en-10k/qa1_" "single-supporting-fact_{}.txt", # QA2 with 10,000 samples "two_supporting_facts_10k": "tasks_1-20_v1-2/en-10k/qa2_" "two-supporting-facts_{}.txt", } challenge_type = "single_supporting_fact_10k" challenge = challenges[challenge_type] with tarfile.open(path) as tar: train_stories = get_stories(tar.extractfile(challenge.format("train"))) test_stories = get_stories(tar.extractfile(challenge.format("test"))) if finish_fast: train_stories = train_stories[:64] test_stories = test_stories[:64] return train_stories, test_stories
def _setup_seqvec(self): """ Load SeqVec model to either GPU or CPU, if weights are not cached - download them from the mirror. :return: instance of deepcoil.utils.seqvec.SeqVec """ seqvec_dir = f'{self._path}/weights/seqvec' seqvec_conf_fn = f'{seqvec_dir}/uniref50_v2/options.json' seqvec_weights_fn = f'{seqvec_dir}/uniref50_v2/weights.hdf5' if not (os.path.isfile(seqvec_conf_fn) and os.path.isfile(seqvec_weights_fn)): print( 'SeqVec weights are not available, downloading from the remote source (this\'ll happen only once)...' ) urls = [ 'https://rostlab.org/~deepppi/seqvec.zip', 'https://lbs.cent.uw.edu.pl/static/files/seqvec.zip' ] for url in urls: try: seqvec_zip_fn = get_file( f'{self._path}/weights/seqvec.zip', url) archive = ZipFile(seqvec_zip_fn) archive.extract('uniref50_v2/options.json', seqvec_dir) archive.extract('uniref50_v2/weights.hdf5', seqvec_dir) break except: print(f'Could not download SeqVec weights from url: {url}') if self.use_gpu: return SeqVec(model_dir=f'{seqvec_dir}/uniref50_v2', cuda_device=0, tokens_per_batch=8000) else: return SeqVec(model_dir=f'{seqvec_dir}/uniref50_v2', cuda_device=-1, tokens_per_batch=8000)
def __init__(self, use_pretrained_weights=True, model_image_shape=(128, 128, 1)): model = PanopticNet('resnet50', input_shape=model_image_shape, norm_method='whole_image', num_semantic_heads=2, num_semantic_classes=[1, 1], location=True, include_top=True, lite=True, use_imagenet=use_pretrained_weights, interpolation='bilinear') if use_pretrained_weights: weights_path = get_file( os.path.basename(WEIGHTS_PATH), WEIGHTS_PATH, cache_subdir='models', file_hash='104a7d7884c80c37d2bce6d1c3a17c7a') model.load_weights(weights_path, by_name=True) else: weights_path = None super(CytoplasmSegmentation, self).__init__(model, model_image_shape=model_image_shape, model_mpp=0.65, preprocessing_fn=phase_preprocess, postprocessing_fn=deep_watershed, dataset_metadata=self.dataset_metadata, model_metadata=self.model_metadata)
def load_data( cls, subset_name: str = 'train', shuffle: bool = True) -> Tuple[List[List[str]], List[List[str]]]: """ Load dataset as sequence labeling format, char level tokenized Args: subset_name: {train, test, valid} shuffle: should shuffle or not, default True. Returns: dataset_features and dataset labels """ corpus_path = get_file(cls.__corpus_name__, cls.__zip_file__name, cache_dir=K.DATA_PATH, untar=True) if subset_name == 'train': file_path = os.path.join(corpus_path, 'example.train') elif subset_name == 'test': file_path = os.path.join(corpus_path, 'example.test') else: file_path = os.path.join(corpus_path, 'example.dev') x_data, y_data = DataReader.read_conll_format_file(file_path) if shuffle: x_data, y_data = utils.unison_shuffled_copies(x_data, y_data) logger.debug( f"loaded {len(x_data)} samples from {file_path}. Sample:\n" f"x[0]: {x_data[0]}\n" f"y[0]: {y_data[0]}") return x_data, y_data
def decode_imagenet_predictions(preds, top=5): global CLASS_INDEX if len(preds.shape) != 2 or preds.shape[1] != 1000: raise ValueError('`decode_predictions` expects ' 'a batch of predictions ' '(i.e. a 2D array of shape (samples, 1000)). ' 'Found array with shape: ' + str(preds.shape)) if CLASS_INDEX is None: fpath = get_file( 'imagenet_class_index.json', CLASS_INDEX_PATH, cache_subdir='models') CLASS_INDEX = json.load(open(fpath)) results = [] for pred in preds: top_indices = pred.argsort()[-top:][::-1] result = [ tuple(CLASS_INDEX[str(i)]) + (pred[i], ) for i in top_indices ] results.append(result) return results