示例#1
0
    def download(self, date_range=(None, None), force=False):
        """
        Download 1 or more monthly Reddit comments files from archive.org
        and save them to disk under the ``data_dir`` used to instantiate.

        Args:
            date_range (Tuple[str]): Interval specifying the [start, end) dates
                for which comments files will be downloaded. Each item must be
                a str formatted as YYYY-MM or YYYY-MM-DD (the latter is converted
                to the corresponding YYYY-MM value). Both start and end values
                must be specified, but a null value for either is automatically
                replaced by the minimum or maximum valid values, respectively.
            force (bool): If True, download the dataset, even if it already
                exists on disk under ``data_dir``.
        """
        date_range = self._parse_date_range(date_range)
        fstubs = self._generate_filestubs(date_range)
        for fstub in fstubs:
            url = compat.urljoin(DOWNLOAD_ROOT, fstub)
            filename = os.path.join(self.data_dir, fstub)
            if os.path.isfile(filename) and force is False:
                LOGGER.warning('File %s already exists; skipping download...',
                               filename)
                continue
            LOGGER.info('Downloading data from %s and writing it to %s', url,
                        filename)
            fileio.write_streaming_download_file(url,
                                                 filename,
                                                 mode='wb',
                                                 encoding=None,
                                                 auto_make_dirs=True,
                                                 chunk_size=1024)
示例#2
0
    def download(self, force=False):
        """
        Download the data as Python version-specific compressed json file and
        save it to disk under the ``data_dir`` directory.

        Args:
            force (bool): If True, download the dataset, even if it already
                exists on disk under ``data_dir``.
        """
        release_tag = 'supreme_court_py{py_version}_v{data_version}'.format(
            py_version=2 if compat.is_python2 else 3, data_version=1.0)
        url = compat.urljoin(DOWNLOAD_ROOT, release_tag + '/' + self.filestub)
        fname = self._filename
        if os.path.isfile(fname) and force is False:
            LOGGER.warning('File %s already exists; skipping download...',
                           fname)
            return
        LOGGER.info('Downloading data from %s and writing it to %s', url,
                    fname)
        fileio.write_streaming_download_file(url,
                                             fname,
                                             mode='wb',
                                             encoding=None,
                                             auto_make_dirs=True,
                                             chunk_size=1024)
示例#3
0
    def download(self, force=False):
        """
        Download a Python version-specific compressed json file from s3,
        and save it to disk under the ``data_dir`` directory.

        Args:
            force (bool): Download the file, even if it already exists on disk.
        """
        url = compat.urljoin(DOWNLOAD_ROOT, self.filestub)
        fname = self._filename
        if os.path.isfile(fname) and force is False:
            LOGGER.warning(
                'File %s already exists; skipping download...', fname)
            return
        LOGGER.info(
            'Downloading data from %s and writing it to %s', url, fname)
        fileio.write_streaming_download_file(
            url, fname, mode='wb', encoding=None,
            auto_make_dirs=True, chunk_size=1024)
示例#4
0
    def download(self, force=False):
        """
        Download the data as a zip archive file and save it to disk under the
        ``data_dir`` directory.

        Args:
            force (bool): If True, download the dataset, even if it already
                exists on disk under ``data_dir``.
        """
        url = DOWNLOAD_ROOT
        fname = self._filename
        if os.path.isfile(fname) and force is False:
            LOGGER.warning(
                'File %s already exists; skipping download...', fname)
            return
        LOGGER.info(
            'Downloading data from %s and writing it to %s', url, fname)
        fileio.write_streaming_download_file(
            url, fname, mode='wb', encoding=None,
            auto_make_dirs=True, chunk_size=1024)
        self._metadata = self._load_and_parse_metadata()
示例#5
0
    def download(self, force=False):
        """
        Download the data corresponding to the given ``lang`` and ``version``
        as a compressed XML file and save it to disk under the ``data_dir`` directory.

        Args:
            force (bool): If True, download the dataset, even if it already
                exists on disk under ``data_dir``.
        """
        url = compat.urljoin(DOWNLOAD_ROOT, self.filestub)
        fname = self._filename
        if os.path.isfile(fname) and force is False:
            LOGGER.warning('File %s already exists; skipping download...',
                           fname)
            return
        LOGGER.info('Downloading data from %s and writing it to %s', url,
                    fname)
        fileio.write_streaming_download_file(url,
                                             fname,
                                             mode='wb',
                                             encoding=None,
                                             auto_make_dirs=True,
                                             chunk_size=1024)