def download_raw_dataset(self): """ Download the raw dataset and extract the contents of the zip file and store that in the cache location. If the user has not specified creds in the kaggle.json file we lookup the passed in username and the api key and perform authentication. """ with self.update_env(KAGGLE_USERNAME=self.kaggle_username, KAGGLE_KEY=self.kaggle_key): # Call authenticate explicitly to pick up new credentials if necessary api = create_kaggle_client() api.authenticate() os.makedirs(self.raw_temp_path, exist_ok=True) if self.is_kaggle_competition: download_func = api.competition_download_files else: download_func = api.dataset_download_files # Download all files for a competition/dataset download_func(self.competition_name, path=self.raw_temp_path) archive_zip = os.path.join(self.raw_temp_path, self.archive_filename) with ZipFile(archive_zip, 'r') as z: z.extractall(self.raw_temp_path) os.rename(self.raw_temp_path, self.raw_dataset_path)
def download_kaggle_dataset(dataset_url, data_dir, force=False, dry_run=False): print("Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds") os.environ['KAGGLE_USERNAME'] = click.prompt("Your Kaggle username") os.environ['KAGGLE_KEY'] = _get_kaggle_key() dataset_id = get_kaggle_dataset_id(dataset_url) if not dry_run: from kaggle import api api.authenticate() api.dataset_download_files( dataset_id, os.path.join(data_dir, dataset_id.split('/')[1]), force=force, quiet=False, unzip=True) else: print("This is a dry run, skipping..")
def download_coronavirus_data(path='New\ Data/', verbose=False): """Installs the Kaggle Command Line Interface to clone dataset. Then extracts dataset to specified path and displays name of main file. Args: path(str): Folder to extract dataset into (must end with a '/') Returns: file_list(list): List of full filepaths to downloaded csv files. """ ## Determine if dataset is downloaded via Kaggle CL import os, glob from zipfile import ZipFile from IPython.display import clear_output os.makedirs(path, exist_ok=True) ## Install Kaggle try: import kaggle.api as kaggle except: ## Install Kaggle os.system("pip install kaggle --upgrade") # clear_output() if verbose: print('\t- Installed kaggle command line tool.') ## Using the kaggle.api import kaggle.api as kaggle kaggle.authenticate() kaggle.dataset_download_files( 'sudalairajkumar/novel-corona-virus-2019-dataset', path=path, force=True, unzip=True) # ## Delete Zip File # zipfile = path+"novel-corona-virus-2019-dataset.zip" # try: # os.system(f"rm {zipfile}" ) # except: # print("ERROR DELETING ZIP FILE") ## Get list of all csvs print('[i] Extraction Complete.') file_list = glob.glob(path + "*.csv") ## Find main df main_file = [file for file in file_list if 'covid_19_data.csv' in file] if verbose: print(f"[i] The main file name is {main_file}") return main_file[0] #file_list[index]
def download_kaggle_dataset(dataset_url, data_dir, force=False, dry_run=False): dataset_id = get_kaggle_dataset_id(dataset_url) id = dataset_id.split('/')[1] target_dir = os.path.join(data_dir, id) if not force and os.path.exists(target_dir) and len( os.listdir(target_dir)) > 0: print( 'Skipping, found downloaded files in "{}" (use force=True to force download)' .format(target_dir)) return if not read_kaggle_creds(): print( "Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds" ) os.environ['KAGGLE_USERNAME'] = click.prompt("Your Kaggle username") os.environ['KAGGLE_KEY'] = _get_kaggle_key() if not dry_run: from kaggle import api api.authenticate() if dataset_id.split('/')[0] == 'competitions' or dataset_id.split( '/')[0] == 'c': api.competition_download_files(id, target_dir, force=force, quiet=False) zip_fname = target_dir + '/' + id + '.zip' extract_archive(zip_fname, target_dir) try: os.remove(zip_fname) except OSError as e: print('Could not delete zip file, got' + str(e)) else: api.dataset_download_files(dataset_id, target_dir, force=force, quiet=False, unzip=True) else: print("This is a dry run, skipping..")
import kaggle.api from kaggle.api.kaggle_api_extended import KaggleApi api = KaggleApi() api.authenticate() # Download all files of a dataset # Signature: dataset_download_files(dataset, path=None, force=False, quiet=True, unzip=False) # api.dataset_download_files('avenn98/world-of-warcraft-demographics') # downoad single file # Signature: dataset_download_file(dataset, file_name, path=None, force=False, quiet=True) api.dataset_download_files( '/shashikant9198/nlp-and-glove-word-embeddings-sentimental-analysis', path='/Users/fred/OneDrive - Adobe/Data/NLP_sentiment/Kaggle_Files')
def get_kaggle(dsname, fpath): import kaggle.api as k k.authenticate() k.dataset_download_files(dsname, path='temp/', unzip=True)
def download_coronavirus_data(self, path=None, verbose=None): """Installs the Kaggle Command Line Interface to clone dataset. Then extracts dataset to specified path and displays name of main file. Args: path(str): Folder to extract dataset into (must end with a '/') Returns: file_list(list): List of full filepaths to downloaded csv files. """ if verbose == None: verbose = self.__verbose if verbose: print('[i] DOWNLOADING DATA USING KAGGLE API') print( "\thttps://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset" ) if path is None: path = self._data_folder ## Determine if dataset is downloaded via Kaggle CL import os, glob from zipfile import ZipFile from IPython.display import clear_output os.makedirs(path, exist_ok=True) try: import kaggle.api as kaggle except: ## Install Kaggle os.system("pip install kaggle --upgrade") # clear_output() if verbose: print('\t- Installed kaggle command line tool.') ## Using the kaggle.api import kaggle.api as kaggle kaggle.authenticate() kaggle.dataset_download_files( 'sudalairajkumar/novel-corona-virus-2019-dataset', path=path, force=True, unzip=True) ## Run Kaggle Command # cmd = 'kaggle datasets download -d sudalairajkumar/novel-corona-virus-2019-dataset' # os.system(cmd) # ## Extract ZipFile # zip_filepath = 'novel-corona-virus-2019-dataset.zip' # with ZipFile(zip_filepath) as file: # file.extractall(path) if self.__verbose: print(f'\t- Downloaded dataset .zip and extracted to:"{path}"') # ## Delete Zip File # try: # os.system(f"rm {path}novel-corona-virus-2019-dataset.zip" ) # except: # print("ERROR DELETING ZIP FILE") self.get_data_fpath(path)