Exemplo n.º 1
0
def download_kaggle_dataset(dataset_url, data_dir, force=False, dry_run=False):
    print("Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds")
    os.environ['KAGGLE_USERNAME'] = click.prompt("Your Kaggle username")
    os.environ['KAGGLE_KEY'] = _get_kaggle_key()

    dataset_id = get_kaggle_dataset_id(dataset_url)
    if not dry_run:
        from kaggle import api
        api.authenticate()
        api.dataset_download_files(
            dataset_id, os.path.join(data_dir, dataset_id.split('/')[1]), force=force, quiet=False, unzip=True)
    else:
        print("This is a dry run, skipping..")
def download_coronavirus_data(path='New\ Data/', verbose=False):
    """Installs the Kaggle Command Line Interface to clone dataset.
    Then extracts dataset to specified path and displays name of main file.
    Args:
        path(str): Folder to extract dataset into (must end with a '/')
        
    Returns:
        file_list(list): List of full filepaths to downloaded csv files.
    """
    ## Determine if dataset is downloaded via Kaggle CL
    import os, glob
    from zipfile import ZipFile
    from IPython.display import clear_output
    os.makedirs(path, exist_ok=True)

    ## Install Kaggle
    try:
        import kaggle.api as kaggle
    except:
        ## Install Kaggle
        os.system("pip install kaggle --upgrade")  #
        clear_output()
        if verbose: print('\t- Installed kaggle command line tool.')

    ## Using the kaggle.api
    import kaggle.api as kaggle
    kaggle.authenticate()
    kaggle.dataset_download_files(
        'sudalairajkumar/novel-corona-virus-2019-dataset',
        path=path,
        force=True,
        unzip=True)
    # ## Delete Zip File
    # zipfile  = path+"novel-corona-virus-2019-dataset.zip"
    # try:
    #     os.system(f"rm {zipfile}"  )
    # except:
    #     print("ERROR DELETING ZIP FILE")

    ## Get list of all csvs
    print('[i] Extraction Complete.')
    file_list = glob.glob(path + "*.csv")

    ## Find main df
    main_file = [file for file in file_list if 'covid_19_data.csv' in file]
    if verbose:
        print(f"[i] The main file name is {main_file}")
    return main_file[0]  #file_list[index]
Exemplo n.º 3
0
def download_kaggle_dataset(dataset_url, data_dir, force=False, dry_run=False):
    dataset_id = get_kaggle_dataset_id(dataset_url)
    id = dataset_id.split('/')[1]
    target_dir = os.path.join(data_dir, id)

    if not force and os.path.exists(target_dir) and len(
            os.listdir(target_dir)) > 0:
        print(
            'Skipping, found downloaded files in "{}" (use force=True to force download)'
            .format(target_dir))
        return

    if not read_kaggle_creds():
        print(
            "Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds"
        )
        os.environ['KAGGLE_USERNAME'] = click.prompt("Your Kaggle username")
        os.environ['KAGGLE_KEY'] = _get_kaggle_key()

    if not dry_run:
        from kaggle import api
        api.authenticate()
        if dataset_id.split('/')[0] == 'competitions' or dataset_id.split(
                '/')[0] == 'c':
            api.competition_download_files(id,
                                           target_dir,
                                           force=force,
                                           quiet=False)
            zip_fname = target_dir + '/' + id + '.zip'
            extract_archive(zip_fname, target_dir)
            try:
                os.remove(zip_fname)
            except OSError as e:
                print('Could not delete zip file, got' + str(e))
        else:
            api.dataset_download_files(dataset_id,
                                       target_dir,
                                       force=force,
                                       quiet=False,
                                       unzip=True)

    else:
        print("This is a dry run, skipping..")
Exemplo n.º 4
0
import kaggle.api
from kaggle.api.kaggle_api_extended import KaggleApi

api = KaggleApi()
api.authenticate()

# Download all files of a dataset
# Signature: dataset_download_files(dataset, path=None, force=False, quiet=True, unzip=False)
# api.dataset_download_files('avenn98/world-of-warcraft-demographics')

# downoad single file
# Signature: dataset_download_file(dataset, file_name, path=None, force=False, quiet=True)

api.dataset_download_files(
    '/shashikant9198/nlp-and-glove-word-embeddings-sentimental-analysis',
    path='/Users/fred/OneDrive - Adobe/Data/NLP_sentiment/Kaggle_Files')
def get_kaggle(dsname, fpath):
    import kaggle.api as k
    k.authenticate()
    k.dataset_download_files(dsname, path='temp/', unzip=True)
    def download_coronavirus_data(self, path=None, verbose=None):
        """Installs the Kaggle Command Line Interface to clone dataset.
        Then extracts dataset to specified path and displays name of main file.
        Args:
            path(str): Folder to extract dataset into (must end with a '/')

        Returns:
            file_list(list): List of full filepaths to downloaded csv files.
        """
        if verbose == None:
            verbose = self.__verbose

        if verbose:
            print('[i] DOWNLOADING DATA USING KAGGLE API')
            print(
                "\thttps://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset"
            )

        if path is None:
            path = self._data_folder

        ## Determine if dataset is downloaded via Kaggle CL
        import os, glob
        from zipfile import ZipFile
        from IPython.display import clear_output
        os.makedirs(path, exist_ok=True)

        try:
            import kaggle.api as kaggle
        except:
            ## Install Kaggle
            os.system("pip install kaggle --upgrade")  #
            clear_output()
            if verbose: print('\t- Installed kaggle command line tool.')

        ## Using the kaggle.api
        import kaggle.api as kaggle
        kaggle.authenticate()
        kaggle.dataset_download_files(
            'sudalairajkumar/novel-corona-virus-2019-dataset',
            path=path,
            force=True,
            unzip=True)

        ## Run Kaggle Command
        # cmd = 'kaggle datasets download -d sudalairajkumar/novel-corona-virus-2019-dataset'
        # os.system(cmd)

        # ## Extract ZipFile
        # zip_filepath = 'novel-corona-virus-2019-dataset.zip'
        # with ZipFile(zip_filepath) as file:
        #     file.extractall(path)

        if self.__verbose:
            print(f'\t- Downloaded dataset .zip and extracted to:"{path}"')

        # ## Delete Zip File
        # try:
        #     os.system(f"rm {path}novel-corona-virus-2019-dataset.zip"  )
        # except:
        #     print("ERROR DELETING ZIP FILE")

        self.get_data_fpath(path)
Exemplo n.º 7
0
def main(dataset_dir, force=False):
    if not os.path.exists(dataset_dir):
        os.mkdir(dataset_dir)
    if not os.listdir(dataset_dir) or force:
        api.dataset_download_files("datasnaek/youtube-new", path=dataset_dir, force=True, quiet=False, unzip=True)