def downloadImages(): filename = 'flower_photos.tgz' dir_path = filename.split('.')[0] if not (os.path.exists(filename) or os.path.exists(dir_path)): url = ' https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz' urllib.request.urlretrieve(url, filename) if not os.path.exists(dir_path): tf = tarfile.open(filename) tf.extractall() # Initialize classes list, this list will contain the names of our classes. classes = [] if os.path.exists(dir_path): # Iterate over the names of each class for class_name in os.listdir(dir_path): # Get the full path of each class class_path = os.path.join(dir_path, class_name) # Check if the class is a directory/folder if os.path.isdir(class_path): # Get the number of images in each class and print them No_of_images = len(os.listdir(class_path)) print("Found {} images of {}".format(No_of_images, class_name)) # Also store the name of each class classes.append(class_name) # Sort the list in alphabatical order and print it classes.sort() print(classes) return classes, dir_path
def untar_data(path): for file in os.scandir(path): if file.name.endswith('.zip'): print( os.stat(path + file.name).st_size, file.name, "this is zip file") with zipfile.ZipFile(file.name, 'r') as z: z.extractall(path) elif file.name.endswith('.gz'): '''specail condition needs to be added for .bin files''' print( os.stat(path + file.name).st_size, file.name, "this is gz file") tf = tarfile.open(file.name, "r") tf.extractall() tf.close() elif file.name.endswith('.tar'): print( os.stat(path + file.name).st_size, file.name, "this is tar file") tf = tarfile.open(file.name, "r") tf.extractall() tf.close() for file in os.scandir(path): print(file.name)
def download_and_extract(data_dir, download_dir): for url in urls: target_file = url.split('/')[-1] if target_file not in os.listdir(download_dir): print('Downloading', url) urllib.request.urlretrieve(url, os.path.join(download_dir, target_file)) tf = tarfile.open(url.split('/')[-1]) tf.extractall(data_dir) else: print('Already downloaded', url)
def class_20_to_10(y_aux): if not os.path.exists('cifar-100-python'): url = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' wget.download(url) tf = tarfile.open("cifar-100-python.tar.gz") tf.extractall() os.remove("cifar-100-python.tar.gz") def unpickle(file): import pickle with open(file, 'rb') as fo: dict = pickle.load(fo, encoding='bytes') return dict dict_try = unpickle('cifar-100-python/test') class_10 = { 0: 1, 1: 2, 2: 5, 3: 6, 4: 5, 5: 6, 6: 6, 7: 3, 8: 0, 9: 7, 10: 8, 11: 0, 12: 1, 13: 3, 14: 4, 15: 0, 16: 2, 17: 5, 18: 9, 19: 9 } y_pri = np.zeros_like(y_aux) for i in range(len(y_aux)): y_pri[i] = class_10[y_aux[i, 0]] return y_pri
def save_response_content(response, destination): CHUNK_SIZE = 32768 with open(destination, "wb") as f: print("WRITING") for chunk in response.iter_content(CHUNK_SIZE): if chunk: # filter out keep-alive new chunks f.write(chunk) print("UNZIPPING...") import tarfile tf = tarfile.open(destination) tf.list() tf.extractall() print("UNZIPPED...") print("DELETING TAR FILE...") import os os.remove(destination) print("DELETED TAR FILE...")
def maybe_download(filename, work_directory, source_url): if gfile.Exists(work_directory): return gfile.MakeDirs(work_directory) filepath = os.path.join(work_directory, filename) if not gfile.Exists(filepath): print('Downloading', filename) temp_file_name, _ = urllib.request.urlretrieve(source_url) gfile.Copy(temp_file_name, filepath) with gfile.GFile(filepath) as f: size = f.size() print('Successfully downloaded', filename, size, 'bytes.') print("Extracting: " + filepath) base_path = os.path.dirname(filepath) with tarfile.open(filepath, "r:gz") as tf: tf.extractall(base_path) os.remove(filepath) return filepath
import cv2 # scikit-image package import skimage import numpy as np import random from sklearn.model_selection import train_test_split tf.enable_eager_execution() tf.set_random_seed(0) np.random.seed(0) # unzip tar file import tarfile tf = tarfile.open("drive/My Drive/Colab Notebooks/notMNIST_large.tar") tf.extractall() tf.close() train_dir = "notMNIST_large/" imgs = [] labels = [] imageSize = 28 # load data and labels def get_data(folder): imgs = [] labels = [] for folderName in os.listdir(folder):
from keras import Input, Model from keras.applications import InceptionResNetV2 from keras.callbacks import TensorBoard from keras.layers import Conv2D, Flatten, Dense, BatchNormalization, Reshape, concatenate, LeakyReLU, Lambda, Activation, UpSampling2D, Dropout from keras.optimizers import Adam from keras.utils import to_categorical from keras_preprocessing import image from scipy.io import loadmat # In[]: Nomor 1 from google.colab import drive drive.mount('/content/drive') import tarfile tf = tarfile.open("/content/drive/My Drive/Chapter 9 AI/wiki_crop.tar") tf.extractall(path="/content/drive/My Drive/Chapter 9 AI") # In[]: Nomor 2 def load_data(wiki_dir, dataset='wiki'): # Load the wiki.mat file meta = loadmat(os.path.join(wiki_dir, "{}.mat".format(dataset))) # Load the list of all files full_path = meta[dataset][0, 0]["full_path"][0] # List of Matlab serial date numbers dob = meta[dataset][0, 0]["dob"][0] # List of years when photo was taken photo_taken = meta[dataset][0, 0]["photo_taken"][0] # year
def maybe_download_and_extract(url_source, download_directory, filename=None, extract=True, expected_bytes=None): """ Check if file exists in download_directory otherwise tries to dowload and extract it. Parameters ---------- url_source : str The URL to download the file from download_directory : str A folder path to search for the file in and dowload the file to filename : str The name of the (to be) dowloaded file. extract : boolean If True, tries to uncompress the dowloaded file, default is True. Supported formats are ".tar.gz/.tar.bz2" or ".zip" files. If different format, extraction is skipped. expected_bytes : int or None If set tries to verify that the downloaded file is of the specified size, otherwise raises an Exception, defaults is None which corresponds to no check. Returns ------- str File path of the dowloaded (uncompressed) file. Examples -------- >>> res = maybe_download_and_extract( ... url_source='http://yann.lecun.com/exdb/mnist/', ... download_directory='data/') """ # Create a download directory if not already existing if not os.path.exists(download_directory): os.makedirs(download_directory) if filename is None: # Get the filename from the URL filename_with_extension = url_source.split('/')[-1] filename = filename_with_extension.split('.')[0] else: filename_with_extension = filename filepath = os.path.join(download_directory, filename) # Download the file if not already existing if not os.path.exists(filepath): def _progress(count, block_size, total_size): percentage = float(count * block_size) / float(total_size) * 100.0 sys.stdout.write(f'\r>> Downloading {filename} {percentage:.1f}%') sys.stdout.flush() # The downloaded file must end with a correct extension to avoid problems filepath_with_extension = os.path.join(download_directory, filename_with_extension) urllib.request.urlretrieve(url_source, filepath_with_extension, _progress) statinfo = os.stat(filepath_with_extension) print(f'Succesfully downloaded {filename} {statinfo.st_size} bytes.') if (not (expected_bytes is None) and (expected_bytes != statinfo.st_size)): raise Exception( f'Failed to verify {filename}. Can you get to it with a browser?' ) if (extract): try: print(f'Trying to extract archive {filepath_with_extension}') if tarfile.is_tarfile(filepath_with_extension): with tarfile.open(filepath_with_extension, 'r') as tf: # If the archive contains more than 1 file, extract into a directory if archive_has_toplevel_dir(tf.getnames()): tf.extractall(download_directory) else: tf.extractall( os.path.join(download_directory, filename)) # Remove the downloaded file if we extracted it os.remove(filepath_with_extension) elif zipfile.is_zipfile(filepath_with_extension): with zipfile.ZipFile(filepath_with_extension) as zf: # If the archive contains more than 1 file, extract into a directory if archive_has_toplevel_dir(zf.namelist()): zf.extractall(download_directory) else: zf.extractall( os.path.join(download_directory, filename)) # Remove the downloaded file if we extracted it os.remove(filepath_with_extension) else: print('Skipping file extraction as format not supported') # The file has not been extracted, so we keep the extension filepath = filepath_with_extension except Exception as e: # Remove the downloaded file before raising the exception os.remove(filepath_with_extension) raise (e) return filepath