def download(self, date_range=(None, None), force=False): """ Download 1 or more monthly Reddit comments files from archive.org and save them to disk under the ``data_dir`` used to instantiate. Args: date_range (Tuple[str]): Interval specifying the [start, end) dates for which comments files will be downloaded. Each item must be a str formatted as YYYY-MM or YYYY-MM-DD (the latter is converted to the corresponding YYYY-MM value). Both start and end values must be specified, but a null value for either is automatically replaced by the minimum or maximum valid values, respectively. force (bool): If True, download the dataset, even if it already exists on disk under ``data_dir``. """ date_range = self._parse_date_range(date_range) fstubs = self._generate_filestubs(date_range) for fstub in fstubs: url = compat.urljoin(DOWNLOAD_ROOT, fstub) filename = os.path.join(self.data_dir, fstub) if os.path.isfile(filename) and force is False: LOGGER.warning('File %s already exists; skipping download...', filename) continue LOGGER.info('Downloading data from %s and writing it to %s', url, filename) fileio.write_streaming_download_file(url, filename, mode='wb', encoding=None, auto_make_dirs=True, chunk_size=1024)
def download(self, force=False): """ Download the data as Python version-specific compressed json file and save it to disk under the ``data_dir`` directory. Args: force (bool): If True, download the dataset, even if it already exists on disk under ``data_dir``. """ release_tag = 'supreme_court_py{py_version}_v{data_version}'.format( py_version=2 if compat.is_python2 else 3, data_version=1.0) url = compat.urljoin(DOWNLOAD_ROOT, release_tag + '/' + self.filestub) fname = self._filename if os.path.isfile(fname) and force is False: LOGGER.warning('File %s already exists; skipping download...', fname) return LOGGER.info('Downloading data from %s and writing it to %s', url, fname) fileio.write_streaming_download_file(url, fname, mode='wb', encoding=None, auto_make_dirs=True, chunk_size=1024)
def download(self, force=False): """ Download a Python version-specific compressed json file from s3, and save it to disk under the ``data_dir`` directory. Args: force (bool): Download the file, even if it already exists on disk. """ url = compat.urljoin(DOWNLOAD_ROOT, self.filestub) fname = self._filename if os.path.isfile(fname) and force is False: LOGGER.warning( 'File %s already exists; skipping download...', fname) return LOGGER.info( 'Downloading data from %s and writing it to %s', url, fname) fileio.write_streaming_download_file( url, fname, mode='wb', encoding=None, auto_make_dirs=True, chunk_size=1024)
def download(self, force=False): """ Download the data as a zip archive file and save it to disk under the ``data_dir`` directory. Args: force (bool): If True, download the dataset, even if it already exists on disk under ``data_dir``. """ url = DOWNLOAD_ROOT fname = self._filename if os.path.isfile(fname) and force is False: LOGGER.warning( 'File %s already exists; skipping download...', fname) return LOGGER.info( 'Downloading data from %s and writing it to %s', url, fname) fileio.write_streaming_download_file( url, fname, mode='wb', encoding=None, auto_make_dirs=True, chunk_size=1024) self._metadata = self._load_and_parse_metadata()
def download(self, force=False): """ Download the data corresponding to the given ``lang`` and ``version`` as a compressed XML file and save it to disk under the ``data_dir`` directory. Args: force (bool): If True, download the dataset, even if it already exists on disk under ``data_dir``. """ url = compat.urljoin(DOWNLOAD_ROOT, self.filestub) fname = self._filename if os.path.isfile(fname) and force is False: LOGGER.warning('File %s already exists; skipping download...', fname) return LOGGER.info('Downloading data from %s and writing it to %s', url, fname) fileio.write_streaming_download_file(url, fname, mode='wb', encoding=None, auto_make_dirs=True, chunk_size=1024)