""" Pulls data from Kaggle API """ from kaggle import KaggleApi api = KaggleApi() api.authenticate() api.dataset_download_files("shivamb/Netflix-shows", unzip= True) api.kernels_output("eugenioscionti/scraping-rotten-tomatoes-to-enrich-netflix-dataset", "./")
def kaggle_dataset_download(api, dataset_name, path): kag_api.dataset_download_files(api, dataset_name, unzip=True, path=path) print("[INFO] Dataset downloaded.")
def fetch_pins_people(resize=.5, min_faces_per_person=0, color=False, slice_=(slice(25, 275), slice(25, 275)), download_if_missing=True): """Load PINS dataset. Use a PINS dataset provided by Kaggle, everage the scikit-learn memory optimizations. Args: resize (float, optional): Image resize factor. Defaults to .5. min_faces_per_person (int, optional): Minimal number of images per person. Defaults to 0. color (bool): Toggle is images should be in RGB or 1 channel. Defaults to False. slice_ (tuple, optional): A rectangle to which images are sliced. Defaults to (slice(70, 195), slice(78, 172)). download_if_missing (bool, optional): Set if the dataset should be downloaded if not present on the machine. Defaults to True. Returns: sklearn.utils.Bunch: Collection of data set """ from kaggle import KaggleApi # Extract ZIP dataset kaggle_api = KaggleApi() kaggle_home = kaggle_api.read_config_file()['path'] path_to_zip = os.path.join(kaggle_home, 'datasets', PINS_DATASET['name'], PINS_DATASET['zip']) path_to_files = os.path.join(kaggle_home, 'datasets', PINS_DATASET['name'], PINS_DATASET['folder']) # Download if missing if download_if_missing and not os.path.exists(path_to_zip): kaggle_api.authenticate() kaggle_api.dataset_download_files(PINS_DATASET['name'], quiet=False) if not os.path.exists(path_to_files): with ZipFile(path_to_zip, 'r') as zipObj: extraction_path = os.path.join(kaggle_home, 'datasets', PINS_DATASET['name']) zipObj.extractall(extraction_path) # Load data in memory m = Memory(location=kaggle_home, compress=6, verbose=0) load_func = m.cache(_fetch_lfw_people) faces, target, target_names = load_func( path_to_files, resize=resize, min_faces_per_person=min_faces_per_person, color=color, slice_=slice_) X = faces.reshape(len(faces), -1) # Fix names with np.nditer(target_names, op_flags=['readwrite']) as it: for x in it: x[...] = np.core.defchararray.replace(x, 'pins ', '') x[...] = np.core.defchararray.replace(x, ' face', '') x[...] = np.core.defchararray.title(x) # pack the results as a Bunch instance return Bunch(data=X, images=faces, target=target, target_names=target_names)