def _get_run(self, dataset, source, id): self._get_dataset(dataset) require.dataset.update(c.dataset) c.source = Source.by_id(source) if c.source is None or c.source.dataset != c.dataset: abort(404, _("There is no source '%s'") % source) c.run = Run.by_id(id) if c.run is None or c.run.source != c.source: abort(404, _("There is no run '%s'") % id)
def get_run(dataset, source, id): dataset = get_dataset(dataset) source = obj_or_404(Source.by_id(source)) if source.dataset != dataset: raise BadRequest("There was no source") run = obj_or_404(Run.by_id(id)) if run.source != source: raise BadRequest("There is no run %s" % str(id)) return dataset, source, run
def get_run(dataset, source, id): dataset = get_dataset(dataset) require.dataset.update(dataset) source = obj_or_404(Source.by_id(source)) if source.dataset != dataset: raise BadRequest("There was no source") run = obj_or_404(Run.by_id(id)) if run.source != source: raise BadRequest("There is no run '" + str(id) + '") return dataset, source, run
def run(self, dry_run=False, max_lines=None, raise_errors=False, **kwargs): self.dry_run = dry_run self.raise_errors = raise_errors # Get unique key for this dataset self.key = self._get_unique_key() # If this is a dry run we need to check uniqueness # Initialize unique check dictionary if dry_run: self.unique_check = {} before_count = len(self.dataset) self.row_number = 0 # If max_lines is set we're doing a sample, not an import operation = Run.OPERATION_SAMPLE if dry_run else Run.OPERATION_IMPORT self._run = Run(operation, Run.STATUS_RUNNING, self.dataset, self.source) db.session.add(self._run) db.session.commit() log.info("Run reference: #%s", self._run.id) try: for row_number, line in enumerate(self.lines, start=1): if max_lines and row_number >= max_lines: break self.row_number = row_number self.process_line(line) except Exception as ex: self.log_exception(ex) if self.raise_errors: self._run.status = Run.STATUS_FAILED self._run.time_end = datetime.utcnow() db.session.commit() raise if self.row_number == 0: self.log_exception(ValueError("Didn't read any lines of data"), error='') num_loaded = len(self.dataset) - before_count if not dry_run and not self.errors and \ num_loaded < (self.row_number - 1): self.log_exception( ValueError("The number of entries loaded is " "smaller than the number of source rows read."), error="%s rows were read, but only %s entries created. " "Check the unique key criteria, entries seem to overlap." % (self.row_number, num_loaded)) if self.errors: self._run.status = Run.STATUS_FAILED else: self._run.status = Run.STATUS_COMPLETE log.info("Finished import with no errors!") self._run.time_end = datetime.utcnow() self.dataset.updated_at = self._run.time_end db.session.commit()