Пример #1
0
    def set_options(self, *args, **kwargs):
        self.url = 'http://campaignfinance.cdn.sos.ca.gov/dbwebexport.zip'

        if kwargs['test_data']:
            self.data_dir = get_test_download_directory()
            settings.CALACCESS_DOWNLOAD_DIR = self.data_dir
        else:
            self.data_dir = get_download_directory()

        os.path.exists(self.data_dir) or os.mkdir(self.data_dir)
        self.zip_path = os.path.join(self.data_dir, 'calaccess.zip')
        self.tsv_dir = os.path.join(self.data_dir, "tsv/")
        self.csv_dir = os.path.join(self.data_dir, "csv/")
        os.path.exists(self.csv_dir) or os.mkdir(self.csv_dir)
        if kwargs['download']:
            self.download_metadata = self.get_download_metadata()
            self.local_metadata = self.get_local_metadata()
            prompt_context = dict(
                last_updated=self.download_metadata['last-modified'],
                time_ago=naturaltime(self.download_metadata['last-modified']),
                size=size(self.download_metadata['content-length']),
                last_download=self.local_metadata['last-download'],
                download_dir=self.data_dir,
            )
            self.prompt = render_to_string(
                'calaccess_raw/downloadcalaccessrawdata.txt',
                prompt_context,
            )
        self.verbosity = int(kwargs['verbosity'])
    def handle(self, *args, **options):
        self.verbosity = options.get("verbosity")
        self.no_color = options.get("no_color")
        self.raw_data_files = RawDataFile.objects
        self.data_dir = get_test_download_directory()
        self.tsv_dir = os.path.join(self.data_dir, "tsv/")
        self.zip_path = os.path.join(self.data_dir, self.url.split('/')[-1])

        with open(self.data_dir + "/sampled_version.txt", "r") as f:
            release_datetime = f.readline()
            size = f.readline()

        try:
            self.version = RawDataVersion.objects.get(
                release_datetime=release_datetime
            )
        except RawDataVersion.DoesNotExist:
            self.version = RawDataVersion.objects.create(
                release_datetime=release_datetime,
                size=size
            )

        self.unzip()
        self.prep()
        self.track_files()

        if getattr(settings, 'CALACCESS_STORE_ARCHIVE', False):
            self.archive()
    def set_options(self, *args, **kwargs):
        self.url = 'http://campaignfinance.cdn.sos.ca.gov/dbwebexport.zip'
        self.verbosity = int(kwargs['verbosity'])

        if kwargs['test_data']:
            self.data_dir = get_test_download_directory()
            settings.CALACCESS_DOWNLOAD_DIR = self.data_dir
            if self.verbosity:
                self.log("Using test data")
        else:
            self.data_dir = get_download_directory()

        os.path.exists(self.data_dir) or os.makedirs(self.data_dir)
        self.zip_path = os.path.join(self.data_dir, 'calaccess.zip')
        self.tsv_dir = os.path.join(self.data_dir, "tsv/")
        self.csv_dir = os.path.join(self.data_dir, "csv/")
        os.path.exists(self.csv_dir) or os.makedirs(self.csv_dir)
        if kwargs['download']:
            self.download_metadata = self.get_download_metadata()
            self.local_metadata = self.get_local_metadata()
            prompt_context = dict(
                last_updated=self.download_metadata['last-modified'],
                time_ago=naturaltime(self.download_metadata['last-modified']),
                size=size(self.download_metadata['content-length']),
                last_download=self.local_metadata['last-download'],
                download_dir=self.data_dir,
            )
            self.prompt = render_to_string(
                'calaccess_raw/downloadcalaccessrawdata.txt',
                prompt_context,
            )
 def set_config(self, *args, **options):
     self.data_dir = get_download_directory()
     self.test_data_dir = get_test_download_directory()
     self.tsv_dir = os.path.join(self.data_dir, "tsv/")
     self.sample_dir = os.path.join(self.test_data_dir, "tsv/")
     self.sample_rows = int(options['samplerows'])
     self.tsv_list = os.listdir(self.tsv_dir)
     self.verbosity = int(options['verbosity'])
 def set_config(self, *args, **options):
     self.data_dir = get_download_directory()
     self.test_data_dir = get_test_download_directory()
     self.tsv_dir = os.path.join(self.data_dir, "tsv/")
     self.sample_dir = os.path.join(self.test_data_dir, "tsv/")
     self.sample_rows = int(options['samplerows'])
     self.tsv_list = os.listdir(self.tsv_dir)
     self.verbosity = int(options['verbosity'])
    def handle(self, *args, **options):
        super(Command, self).handle(*args, **options)
        # set / compute any attributes that multiple class methods need
        self.app_name = options["app_name"]
        self.database = options["database"]
        self.keep_files = options["keep_files"]

        if options['test_data']:
            # if using test data, we don't need to download
            options["download"] = False
            # and always keep files when running test data
            self.keep_files = True

        if options['test_data']:
            self.data_dir = get_test_download_directory()
            # need to set this app-wide because cleancalaccessrawfile
            #   also calls get_download_directory
            settings.CALACCESS_DOWNLOAD_DIR = self.data_dir
        else:
            self.data_dir = get_download_directory()

        os.path.exists(self.data_dir) or os.makedirs(self.data_dir)
        self.zip_path = os.path.join(self.data_dir, 'calaccess.zip')
        self.zip_metadata_path = os.path.join(self.data_dir,
                                              '.lastdownload')
        self.tsv_dir = os.path.join(self.data_dir, "tsv/")

        # Immediately check that the tsv directory exists when using test data,
        #   so we can stop immediately.
        if options['test_data']:
            if not os.path.exists(self.tsv_dir):
                raise CommandError("Data tsv directory does not exist "
                                   "at %s" % self.tsv_dir)
            elif self.verbosity:
                self.log("Using test data")

        self.csv_dir = os.path.join(self.data_dir, "csv/")
        os.path.exists(self.csv_dir) or os.makedirs(self.csv_dir)

        if options['download']:

            call_command(
                "downloadcalaccessrawdata",
                keep_files=self.keep_files,
                verbosity=self.verbosity,
                resume=options['resume'],
                noinput=options['noinput']
            )

        # execute the other steps that haven't been skipped
        if options['clean']:
            self.clean()
        if options['load']:
            self.load()

        if self.verbosity:
            self.success("Done!")
    def handle(self, *args, **options):
        super(Command, self).handle(*args, **options)

        # Set options
        self.data_dir = get_download_directory()
        self.test_data_dir = get_test_download_directory()
        self.tsv_dir = os.path.join(self.data_dir, "tsv/")
        self.sample_dir = os.path.join(self.test_data_dir, "tsv/")
        self.sample_rows = int(options['samplerows'])
        self.tsv_list = os.listdir(self.tsv_dir)
        self.verbosity = int(options['verbosity'])

        self.header("Sampling %i rows from %s source files" % (
            self.sample_rows,
            len(self.tsv_list),
        ))

        # Make sure sample dir exists and is empty
        os.path.exists(self.test_data_dir) or os.makedirs(self.test_data_dir)
        os.path.exists(self.sample_dir) and shutil.rmtree(self.sample_dir)
        os.makedirs(self.sample_dir)

        # Loop through all the files in the source directory
        for name in progress.bar(self.tsv_list):

            # Find the input
            file = os.path.join(self.tsv_dir, name)
            out_file = os.path.join(self.sample_dir, name)

            if self.verbosity > 2:
                self.log(" Sampling %s" % file)

            # Open the file
            fi = FileInput(file, True)

            # Generate our sample
            sample = two_pass_sample(fi, sample_size=self.sample_rows)

            # Open our output file
            with open(out_file, 'wb') as out:

                # Write it out
                for line in chain(fi.header, sample):
                    out.write(line)

        self.header("Compressing zip file...")
        self.save_zip()

        # Stash the release_datetime and size of the last completed download
        version = self.command_logs.filter(
            command='downloadcalaccessrawdata',
            finish_datetime__isnull=False
        ).order_by('-start_datetime')[0].version

        with open(self.test_data_dir + '/sampled_version.txt', 'w') as f:
            f.write(str(version.release_datetime) + '\n')
            f.write(str(version.size))
Пример #8
0
    def handle(self, *args, **options):
        if options['test_data']:
            # disable the steps that don't apply to test data
            options["download"] = False
            options["unzip"] = False
            options["prep"] = False
            options["clear"] = False

            self.log("Using test data")

            tsv_dir = os.path.join(get_test_download_directory(), "tsv/")

            # if the directory doesn't exist, abort
            if not os.path.exists(tsv_dir):
                self.failure("Sampled data tsv directory does not \
exist at %s" % tsv_dir)
                return

        # Set the options
        self.set_options(*args, **options)
        # Get to work
        if options['download']:
            if options['noinput']:
                self.download()
            else:
                # Ensure stdout can handle Unicode data: http://bit.ly/1C3l4eV
                locale_encoding = locale.getpreferredencoding()
                old_stdout = sys.stdout
                sys.stdout = codecs.getwriter(locale_encoding)(sys.stdout)

                confirm = input(self.prompt)

                # Set things back to the way they were before continuing.
                sys.stdout = old_stdout

                if confirm != 'yes':
                    self.failure("Download cancelled")
                    return
                self.download()
        if options['unzip']:
            self.unzip()
        if options['prep']:
            self.prep()
        if options['clear']:
            self.clear()
        if options['clean']:
            self.clean()
        if options['load']:
            self.load()
        self.success("Done!")
    def handle(self, *args, **options):
        if options['test_data']:
            # disable the steps that don't apply to test data
            options["download"] = False
            options["unzip"] = False
            options["prep"] = False
            options["clear"] = False

            self.log("Using test data")

            tsv_dir = os.path.join(get_test_download_directory(), "tsv/")

            # if the directory doesn't exist, abort
            if not os.path.exists(tsv_dir):
                self.failure("Sampled data tsv directory does not \
exist at %s" % tsv_dir)
                return

        # Set the options
        self.set_options(*args, **options)
        # Get to work
        if options['download']:
            if options['noinput']:
                self.download()
            else:
                # Ensure stdout can handle Unicode data: http://bit.ly/1C3l4eV
                locale_encoding = locale.getpreferredencoding()
                old_stdout = sys.stdout
                sys.stdout = codecs.getwriter(locale_encoding)(sys.stdout)

                confirm = input(self.prompt)

                # Set things back to the way they were before continuing.
                sys.stdout = old_stdout

                if confirm != 'yes':
                    self.failure("Download cancelled")
                    return
                self.download()
        if options['unzip']:
            self.unzip()
        if options['prep']:
            self.prep()
        if options['clear']:
            self.clear()
        if options['clean']:
            self.clean()
        if options['load']:
            self.load()
        self.success("Done!")
    def handle(self, *args, **options):
        super(Command, self).handle(*args, **options)

        # set / compute any attributes that multiple class methods need
        self.app_name = options["app_name"]
        self.keep_files = options["keep_files"]
        self.test_mode = options['test_data']
        self.downloading = options['download']
        self.cleaning = options['clean']
        self.loading = options['load']

        if self.test_mode:
            # if using test data, we don't need to download
            self.downloading = False
            # and always keep files when running test data
            self.keep_files = True
            self.data_dir = get_test_download_directory()
            # need to set this app-wide because cleancalaccessrawfile
            #   also calls get_download_directory
            settings.CALACCESS_DOWNLOAD_DIR = self.data_dir
        else:
            self.data_dir = get_download_directory()

        os.path.exists(self.data_dir) or os.makedirs(self.data_dir)
        self.zip_path = os.path.join(self.data_dir, 'calaccess.zip')
        self.tsv_dir = os.path.join(self.data_dir, "tsv/")

        # Immediately check that the tsv directory exists when using test data,
        #   so we can stop immediately.
        if self.test_mode:
            if not os.path.exists(self.tsv_dir):
                raise CommandError("Data tsv directory does not exist "
                                   "at %s" % self.tsv_dir)
            elif self.verbosity:
                self.log("Using test data")

        self.csv_dir = os.path.join(self.data_dir, "csv/")
        os.path.exists(self.csv_dir) or os.makedirs(self.csv_dir)

        download_metadata = self.get_download_metadata()
        current_release_datetime = download_metadata['last-modified']
        last_started_update = self.get_last_log()

        try:
            last_download = self.command_logs.filter(
                command='downloadcalaccessrawdata').order_by(
                    '-start_datetime')[0]
        except IndexError:
            last_download = None

        up_to_date = False
        can_resume = False

        # if there's a previously started update
        if last_started_update:
            # if current release datetime matches version of last started update
            if current_release_datetime == last_started_update.version.release_datetime:
                # if the last update finished
                if last_started_update.finish_datetime:
                    up_to_date = True
                else:
                    # if the last update didn't finish
                    # (but is still for the current version)
                    can_resume = True
            # if the last started update didn't finish
            elif not last_started_update.finish_datetime:
                # can resume update of old version as long as skipping download
                if not self.downloading:
                    can_resume = True
                # or if there is a last download
                elif last_download:
                    # and last download's version matches the outstanding update version
                    if last_download.version == last_started_update.version:
                        # and last download completed
                        if last_download.finish_datetime:
                            can_resume = True

        if options['noinput']:
            # if not taking input and can resume, automatically go into resume mode
            self.resume_mode = can_resume
        else:
            prompt_context = dict(
                current_release_datetime=current_release_datetime,
                expected_size=size(download_metadata['content-length']),
                up_to_date=up_to_date,
                can_resume=can_resume,
            )

            last_finished_update = self.get_last_log(finished=True)

            if last_finished_update:
                loaded_v = last_finished_update.version
                prompt_context['since_loaded_version'] = naturaltime(
                    loaded_v.release_datetime)
            else:
                prompt_context['since_loaded_version'] = None

            prompt = render_to_string(
                'calaccess_raw/updatecalaccessrawdata.txt',
                prompt_context,
            )

            if can_resume:
                if self.confirm_proceed(prompt):
                    self.resume_mode = True
                else:
                    self.resume_mode = False
                    if not self.confirm_proceed(
                            'Do you want re-start your update?\n'):
                        raise CommandError("Update cancelled")
            else:
                self.resume_mode = False
                if not self.confirm_proceed(prompt):
                    raise CommandError("Update cancelled")

        if not self.test_mode:
            if self.resume_mode:
                self.log_record = last_started_update
            else:
                # get or create a version
                # .get_or_create() throws IntegrityError
                try:
                    version = self.raw_data_versions.get(
                        release_datetime=current_release_datetime)
                except RawDataVersion.DoesNotExist:
                    version = self.raw_data_versions.create(
                        release_datetime=current_release_datetime,
                        size=download_metadata['content-length'])
                # create a new log record
                self.log_record = self.command_logs.create(
                    version=version,
                    command=self,
                    called_by=self.get_caller_log())

        # if the user could have resumed but didn't
        force_restart_download = can_resume and not self.resume_mode

        # if not skipping download, and there's a previous download
        if self.downloading and last_download:
            # if not forcing a restart
            if not force_restart_download:
                # check if version we are updating is last one being downloaded
                if self.log_record.version == last_download.version:
                    # if it finished
                    if last_download.finish_datetime:
                        self.log('Already downloaded.')
                        self.downloading = False

        if self.downloading:
            call_command(
                "downloadcalaccessrawdata",
                keep_files=self.keep_files,
                verbosity=self.verbosity,
                noinput=True,
                restart=force_restart_download,
            )
            if self.verbosity:
                self.duration()

        # execute the other steps that haven't been skipped
        if options['clean']:
            self.clean()
            if self.verbosity:
                self.duration()

        if options['load']:
            self.load()
            if self.verbosity:
                self.duration()

        if self.verbosity:
            self.success("Done!")

        if not self.test_mode:
            self.log_record.finish_datetime = datetime.now()
            self.log_record.save()
    def set_options(self, *args, **kwargs):
        self.url = 'http://campaignfinance.cdn.sos.ca.gov/dbwebexport.zip'
        self.verbosity = int(kwargs['verbosity'])
        self.database = kwargs['database']

        if kwargs['test_data']:
            self.data_dir = get_test_download_directory()
            settings.CALACCESS_DOWNLOAD_DIR = self.data_dir
        else:
            self.data_dir = get_download_directory()

        os.path.exists(self.data_dir) or os.makedirs(self.data_dir)
        self.zip_path = os.path.join(self.data_dir, 'calaccess.zip')
        self.zip_metadata_path = os.path.join(self.data_dir,
                                              '.lastdownload')
        self.tsv_dir = os.path.join(self.data_dir, "tsv/")

        # Immediately check that the tsv directory exists when using test data,
        #   so we can stop immediately.
        if kwargs['test_data']:
            if not os.path.exists(self.tsv_dir):
                raise CommandError("Data tsv directory does not exist "
                                   "at %s" % self.tsv_dir)
            elif self.verbosity:
                self.log("Using test data")

        self.csv_dir = os.path.join(self.data_dir, "csv/")
        os.path.exists(self.csv_dir) or os.makedirs(self.csv_dir)
        if kwargs['download']:
            self.download_metadata = self.get_download_metadata()
            self.local_metadata = self.get_local_metadata()

            total_size = self.download_metadata['content-length']
            last_modified = self.download_metadata['last-modified']
            last_download = self.local_metadata['last-download']
            cur_size = 0

            self.resume_download = (kwargs['resume-download'] and
                                    os.path.exists(self.zip_path))

            if self.resume_download:
                # Make sure the downloaded chunk is newer than the
                # last update to the remote data.
                timestamp = os.path.getmtime(self.zip_path)
                chunk_datetime = datetime.fromtimestamp(timestamp, utc)
                self.resume_download = chunk_datetime > last_modified
                if self.resume_download:
                    last_download = chunk_datetime
                    cur_size = os.path.getsize(self.zip_path)

            prompt_context = dict(
                resuming=self.resume_download,
                already_downloaded=last_modified == last_download,
                last_modified=last_modified,
                last_download=last_download,
                time_ago=naturaltime(last_download),
                total_size=size(total_size),
                cur_size=size(cur_size),
                download_dir=self.data_dir,
            )

            self.prompt = render_to_string(
                'calaccess_raw/downloadcalaccessrawdata.txt',
                prompt_context,
            )
Пример #12
0
    def set_options(self, *args, **kwargs):
        self.url = 'http://campaignfinance.cdn.sos.ca.gov/dbwebexport.zip'
        self.verbosity = int(kwargs['verbosity'])

        if kwargs['test_data']:
            self.data_dir = get_test_download_directory()
            settings.CALACCESS_DOWNLOAD_DIR = self.data_dir
        else:
            self.data_dir = get_download_directory()

        os.path.exists(self.data_dir) or os.makedirs(self.data_dir)
        self.zip_path = os.path.join(self.data_dir, 'calaccess.zip')
        self.tsv_dir = os.path.join(self.data_dir, "tsv/")

        # Immediately check that the tsv directory exists when using test data,
        #   so we can stop immediately.
        if kwargs['test_data']:
            if not os.path.exists(self.tsv_dir):
                raise CommandError("Data tsv directory does not exist "
                                   "at %s" % self.tsv_dir)
            elif self.verbosity:
                self.log("Using test data")

        self.csv_dir = os.path.join(self.data_dir, "csv/")
        os.path.exists(self.csv_dir) or os.makedirs(self.csv_dir)
        if kwargs['download']:
            self.download_metadata = self.get_download_metadata()
            self.local_metadata = self.get_local_metadata()

            total_size = self.download_metadata['content-length']
            last_modified = self.download_metadata['last-modified']
            last_download = self.local_metadata['last-download']
            cur_size = 0

            self.resume_download = (kwargs['resume-download'] and
                                    os.path.exists(self.zip_path)) 

            if self.resume_download:
                # Make sure the downloaded chunk is newer than the
                # last update to the remote data.
                timestamp = os.path.getmtime(self.zip_path)
                chunk_datetime = datetime.fromtimestamp(timestamp, utc)
                self.resume_download = chunk_datetime > last_modified
                if self.resume_download:
                    last_download = chunk_datetime
                    cur_size = os.path.getsize(self.zip_path)

            prompt_context = dict(
                resuming=self.resume_download,
                already_downloaded=last_modified==last_download,
                last_modified=last_modified,
                last_download=last_download,
                time_ago=naturaltime(last_download),
                total_size=size(total_size),
                cur_size=size(cur_size),
                download_dir=self.data_dir,
            )

            self.prompt = render_to_string(
                'calaccess_raw/downloadcalaccessrawdata.txt',
                prompt_context,
            )
    def handle(self, *args, **options):
        super(Command, self).handle(*args, **options)

        # set / compute any attributes that multiple class methods need
        self.app_name = options["app_name"]
        self.keep_files = options["keep_files"]
        self.test_mode = options['test_data']
        self.downloading = options['download']
        self.cleaning = options['clean']
        self.loading = options['load']
        self.noinput = options['noinput']

        if self.test_mode:
            # and always keep files when running test data
            self.keep_files = True
            self.data_dir = get_test_download_directory()
            # need to set this app-wide because cleancalaccessrawfile
            #   also calls get_download_directory
            settings.CALACCESS_DOWNLOAD_DIR = self.data_dir
            self.noinput = True
        else:
            self.data_dir = get_download_directory()

        os.path.exists(self.data_dir) or os.makedirs(self.data_dir)
        self.zip_path = os.path.join(self.data_dir, 'calaccess.zip')
        self.tsv_dir = os.path.join(self.data_dir, "tsv/")

        # Immediately check that the tsv directory exists when using test data,
        #   so we can stop immediately.
        if self.test_mode:
            if not os.path.exists(self.tsv_dir):
                raise CommandError("Data tsv directory does not exist "
                                   "at %s" % self.tsv_dir)
            elif self.verbosity:
                self.log("Using test data")

        self.csv_dir = os.path.join(self.data_dir, "csv/")
        os.path.exists(self.csv_dir) or os.makedirs(self.csv_dir)

        if self.test_mode:
            with open(self.data_dir + "/sampled_version.txt", "r") as f:
                current_release_datetime = f.readline()
                expected_size = f.readline()
        else:
            download_metadata = self.get_download_metadata()
            current_release_datetime = download_metadata['last-modified']
            expected_size = download_metadata['content-length']

        last_started_update = self.get_last_log()

        if self.test_mode:
            last_download = None
        else:
            try:
                last_download = self.command_logs.filter(
                    command='downloadcalaccessrawdata'
                ).order_by('-start_datetime')[0]
            except IndexError:
                last_download = None

        up_to_date = False
        can_resume = False

        # if there's a previously started update
        if last_started_update:
            # if current release datetime matches version of last started update
            if current_release_datetime == last_started_update.version.release_datetime:
                # if the last update finished
                if last_started_update.finish_datetime:
                    up_to_date = True
                else:
                    # if the last update didn't finish
                    # (but is still for the current version)
                    can_resume = True
            # if the last started update didn't finish
            elif not last_started_update.finish_datetime:
                # can resume update of old version as long as skipping download
                if not self.downloading:
                    can_resume = True
                # or if there is a last download
                elif last_download:
                    # and last download's version matches the outstanding update version
                    if last_download.version == last_started_update.version:
                        # and last download completed
                        if last_download.finish_datetime:
                            can_resume = True

        if self.noinput:
            # if not taking input and can resume, automatically go into resume mode
            self.resume_mode = can_resume
        else:
            prompt_context = dict(
                current_release_datetime=current_release_datetime,
                expected_size=size(expected_size),
                up_to_date=up_to_date,
                can_resume=can_resume,
            )

            last_finished_update = self.get_last_log(finished=True)

            if last_finished_update:
                loaded_v = last_finished_update.version
                prompt_context['since_loaded_version'] = naturaltime(loaded_v.release_datetime)
            else:
                prompt_context['since_loaded_version'] = None

            prompt = render_to_string(
                'calaccess_raw/updatecalaccessrawdata.txt',
                prompt_context,
            )

            if can_resume:
                if self.confirm_proceed(prompt):
                    self.resume_mode = True
                else:
                    self.resume_mode = False
                    if not self.confirm_proceed('Do you want re-start your update?\n'):
                        raise CommandError("Update cancelled")
            else:
                self.resume_mode = False
                if not self.confirm_proceed(prompt):
                    raise CommandError("Update cancelled")

        if self.resume_mode:
            self.log_record = last_started_update
        else:
            # get or create a version
            # .get_or_create() throws IntegrityError
            try:
                version = self.raw_data_versions.get(
                    release_datetime=current_release_datetime
                )
            except RawDataVersion.DoesNotExist:
                version = self.raw_data_versions.create(
                    release_datetime=current_release_datetime,
                    size=expected_size
                )
            # create a new log record
            self.log_record = self.command_logs.create(
                version=version,
                command=self,
                called_by=self.get_caller_log()
            )

        # if the user could have resumed but didn't
        force_restart_download = can_resume and not self.resume_mode

        # if not skipping download, and there's a previous download
        if self.downloading and last_download:
            # if not forcing a restart
            if not force_restart_download:
                # check if version we are updating is last one being downloaded
                if self.log_record.version == last_download.version:
                    # if it finished
                    if last_download.finish_datetime:
                        self.log('Already downloaded.')
                        self.downloading = False

        if self.downloading:
            if self.test_mode:
                call_command(
                    "downloadcalaccessrawdatatest",
                    verbosity=self.verbosity,
                )
            else:
                call_command(
                    "downloadcalaccessrawdata",
                    keep_files=self.keep_files,
                    verbosity=self.verbosity,
                    noinput=True,
                    restart=force_restart_download,
                )
            if self.verbosity:
                self.duration()

        # execute the other steps that haven't been skipped
        if options['clean']:
            self.clean()
            if self.verbosity:
                self.duration()

        if options['load']:
            self.load()
            if self.verbosity:
                self.duration()

        if self.verbosity:
            self.success("Done!")

        self.log_record.finish_datetime = now()
        self.log_record.save()
    def handle(self, *args, **options):
        super(Command, self).handle(*args, **options)

        # set / compute any attributes that multiple class methods need
        self.app_name = options["app_name"]
        self.keep_files = options["keep_files"]

        if options['test_data']:
            # if using test data, we don't need to download
            options['download'] = False
            # and always keep files when running test data
            self.keep_files = True

        if options['test_data']:
            self.data_dir = get_test_download_directory()
            # need to set this app-wide because cleancalaccessrawfile
            #   also calls get_download_directory
            settings.CALACCESS_DOWNLOAD_DIR = self.data_dir
        else:
            self.data_dir = get_download_directory()

        os.path.exists(self.data_dir) or os.makedirs(self.data_dir)
        self.zip_path = os.path.join(self.data_dir, 'calaccess.zip')
        self.tsv_dir = os.path.join(self.data_dir, "tsv/")

        # Immediately check that the tsv directory exists when using test data,
        #   so we can stop immediately.
        if options['test_data']:
            if not os.path.exists(self.tsv_dir):
                raise CommandError("Data tsv directory does not exist "
                                   "at %s" % self.tsv_dir)
            elif self.verbosity:
                self.log("Using test data")

        self.csv_dir = os.path.join(self.data_dir, "csv/")
        os.path.exists(self.csv_dir) or os.makedirs(self.csv_dir)

        download_metadata = self.get_download_metadata()
        self.current_release_datetime = download_metadata['last-modified']
        self.last_update = self.get_last_log()
        self.resume_download = self.check_can_resume_download()
        self.log_record = None

        # if this isn't a test
        if not options['test_data']:
            # and there's a previous update
            if self.last_update:
                # which did not finish
                if not self.last_update.finish_datetime:
                    # and either can resume download or skipping it altogether
                    if self.resume_download or not options['download']:
                        # can resume
                        self.log_record = self.last_update

            # if not testing, but can't resume
            if not self.log_record:
                # get or create a version
                # .get_or_create() throws IntegrityError
                try:
                    version = self.raw_data_versions.get(
                        release_datetime=self.current_release_datetime
                    )
                except RawDataVersion.DoesNotExist:
                    version = self.raw_data_versions.create(
                        release_datetime=self.current_release_datetime,
                        size=download_metadata['content-length']
                    )
                # create a new log record
                self.log_record = self.command_logs.create(
                    version=version,
                    command=self,
                    called_by=self.get_caller()
                )

        if options['download']:
            call_command(
                "downloadcalaccessrawdata",
                keep_files=self.keep_files,
                verbosity=self.verbosity,
                resume=self.resume_download,
                noinput=options['noinput'],
            )
            if self.verbosity:
                self.duration()

        # execute the other steps that haven't been skipped
        if options['clean']:
            self.clean()
            if self.verbosity:
                self.duration()

        if options['load']:
            self.load()
            if self.verbosity:
                self.duration()

        if self.verbosity:
            self.success("Done!")

        if not options['test_data']:
            self.log_record.finish_datetime = datetime.now()
            self.log_record.save()