def extract_mind(train_zip, valid_zip, train_folder="train", valid_folder="valid"): """Extract MIND dataset Args: train_zip (str): Path to train zip file valid_zip (str): Path to valid zip file train_folder (str): Destination forder for train set valid_folder (str): Destination forder for validation set Returns: str, str: Train and validation folders """ root_folder = os.path.basename(train_zip) train_path = os.path.join(root_folder, train_folder) valid_path = os.path.join(root_folder, valid_folder) unzip_file(train_zip, train_path) unzip_file(valid_zip, valid_path) return train_path, valid_path
def extract_mind(train_zip, valid_zip, test_zip=None, train_folder="train", valid_folder="valid", test_folder='test', root_folder=""): """Extract MIND dataset Args: train_zip (str): Path to train zip file valid_zip (str): Path to valid zip file train_folder (str): Destination forder for train set valid_folder (str): Destination forder for validation set Returns: str, str: Train and validation folders """ train_path = os.path.join(root_folder, train_folder) valid_path = os.path.join(root_folder, valid_folder) unzip_file(train_zip, train_path) unzip_file(valid_zip, valid_path) if test_zip is not None: test_path = os.path.join(root_folder, test_folder) unzip_file(test_zip, test_path) return train_path, valid_path, test_path return train_path, valid_path
def _download_and_extract_globe(dest_path): url = "http://nlp.stanford.edu/data/glove.6B.zip" filepath = maybe_download(url=url, work_directory=dest_path) glove_path = os.path.join(dest_path, "glove") unzip_file(filepath, glove_path, clean_zip_file=False) return glove_path