Пример #1
0
    def do_import(self, where: ImportWhere, how: ImportHow, rowcount: int, report_def: Callable) -> int:
        """
            Import the full bundle, i.e. every contained file.
            :param where:
            :param how:
            :param rowcount: Total rowcount, from preparation step.
            :param report_def: A def to call at certain points for reporting progress.
            :return: The total number of rows
        """
        random.seed()
        stats = ImportStats(rowcount, report_def)
        # Borrow session from writer
        session = where.db_writer.session
        # Get parent (enclosing) Sample, Acquisition, Process, if any
        how.existing_parents = self.fetch_existing_parents(session, prj_id=how.prj_id)
        for alias, _clazz in GlobalMapping.PARENT_CLASSES.items():
            log_line = {v.orig_id: v.pk() for k, v in how.existing_parents[alias].items()}
            logger.info("existing %s = %s", alias, log_line)
        # The created objects (unicity from object_id in TSV, orig_id in model)
        how.existing_objects = self.fetch_existing_objects(session, prj_id=how.prj_id)
        # The stored images (unicity for object ID + rank)
        how.image_ranks_per_obj = self.fetch_existing_ranks(session, prj_id=how.prj_id)

        ret = self.import_each_file(where, how, stats)
        return ret
Пример #2
0
 def _collect_existing_and_validate(self, source_dir_or_zip, loaded_files) \
         -> Tuple[ImportHow, ImportDiagnostic, int]:
     """
         Prepare the import by checking what's inside the project and scanning files to input.
     """
     # The mapping to TSV custom columns, either empty or from previous import operations on same project.
     mapping = ProjectMapping().load_from_project(self.prj)
     # Source bundle construction
     bundle_temp_dir = Path(self.temp_for_jobs.data_dir_for(self.job_id))
     source_bundle = InBundle(source_dir_or_zip, bundle_temp_dir)
     # Configure the validation to come, directives.
     import_how = ImportHow(self.prj_id, self.req.update_mode, mapping,
                            self.req.skip_existing_objects, loaded_files)
     if self.req.skip_loaded_files:
         import_how.compute_skipped(source_bundle, logger)
     # A structure to collect validation result
     import_diag = ImportDiagnostic()
     if not self.req.skip_existing_objects:
         with CodeTimer(
                 "collect_existing: Existing images for %d: " % self.prj_id,
                 logger):
             import_diag.existing_objects_and_image = Image.fetch_existing_images(
                 self.session, self.prj_id)
     import_diag.topology.read_from_db(self.session, prj_id=self.prj_id)
     # Do the bulk job of validation
     nb_rows = source_bundle.validate_import(
         import_how, import_diag, self.session,
         self.report_validation_progress)
     return import_how, import_diag, nb_rows
Пример #3
0
    def _do_clone(self):
        """
            Cloning operation itself. Assumes that @see self.to_clone was populated before.
        """
        # Get the mappings in source project, in order to determines the useful columns
        custom_mapping = ProjectMapping().load_from_project(self.prj)
        obj_mapping = custom_mapping.object_mappings
        used_columns = set(obj_mapping.real_cols_to_tsv.keys())
        used_columns.add("orig_id")  # By safety
        # Create a DB writer
        writer = DBWriter(self.session)
        # Narrow the writes in ObjectFields thanks to mappings of original project
        writer.generators({"obj_field": used_columns})
        # Use import helpers
        dest_prj_id = self.dest_prj.projid
        import_how = ImportHow(prj_id=dest_prj_id,
                               update_mode="No",
                               custom_mapping=ProjectMapping(),
                               skip_object_duplicates=False,
                               loaded_files=[])
        # Get parent (enclosing) Sample, Acquisition, Process. There should be 0 in this context...
        import_how.existing_parents = InBundle.fetch_existing_parents(
            self.session, prj_id=dest_prj_id)

        self._clone_all(import_how, writer)
        # Copy mappings to destination. We could narrow them to the minimum?
        custom_mapping.write_to_project(self.dest_prj)
Пример #4
0
 def do_intra_step_1(self, loaded_files):
     # The mapping to custom columns, either empty or from previous import API_operations on same project.
     custom_mapping = ProjectMapping().load_from_project(self.prj)
     # Source bundle construction
     source_bundle = InBundle(
         self.source_dir_or_zip,
         Path(self.temp_for_task.data_dir_for(self.task_id)))
     # Configure the validation to come, directives.
     import_how = ImportHow(self.prj_id, self.req.update_mode,
                            custom_mapping, self.req.skip_existing_objects,
                            loaded_files)
     if self.req.skip_loaded_files:
         import_how.compute_skipped(source_bundle, logger)
     # A structure to collect validation result
     import_diag = ImportDiagnostic()
     if not self.req.skip_existing_objects:
         with CodeTimer(
                 "do_intra_step_1: Existing images for %d: " % self.prj_id,
                 logger):
             import_diag.existing_objects_and_image = Image.fetch_existing_images(
                 self.session, self.prj_id)
     import_diag.topology.read_from_db(self.session, prj_id=self.prj_id)
     # Do the bulk job of validation
     nb_rows = source_bundle.validate_import(import_how, import_diag,
                                             self.session,
                                             self.report_progress)
     return import_how, import_diag, nb_rows
Пример #5
0
 def before_import(self, how: ImportHow):
     how.vignette_maker = None
     # Pick vignette-ing config file from the zipped directory
     potential_config = self.path / self.VIGNETTE_CONFIG
     if potential_config.exists():
         vignette_maker_cfg = configparser.ConfigParser()
         vignette_maker_cfg.read(potential_config.as_posix())
         how.vignette_maker = VignetteMaker(vignette_maker_cfg, self.path, self.TEMP_VIGNETTE)
Пример #6
0
    def do_run(self, current_user_id: int) -> ImportRealRsp:
        """
            Do the real job using injected parameters.
            :return:
        """
        # Security check
        RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE,
                            self.prj_id)
        # OK
        loaded_files = none_to_empty(self.prj.fileloaded).splitlines()
        logger.info("Previously loaded files: %s", loaded_files)

        # Save mappings straight away
        self.save_mapping(self.custom_mapping)

        source_bundle = InBundle(
            self.req.source_path,
            Path(self.temp_for_task.data_dir_for(self.task_id)))
        # Configure the import to come, destination
        db_writer = DBWriter(self.session)
        import_where = ImportWhere(
            db_writer, self.vault,
            self.temp_for_task.base_dir_for(self.task_id))
        # Configure the import to come, directives
        import_how = ImportHow(self.prj_id, self.req.update_mode,
                               self.custom_mapping,
                               self.req.skip_existing_objects, loaded_files)
        import_how.taxo_mapping = self.req.taxo_mappings
        import_how.taxo_found = self.req.found_taxa
        import_how.found_users = self.req.found_users
        if self.req.skip_loaded_files:
            import_how.compute_skipped(source_bundle, logger)
        if not self.req.skip_existing_objects:
            with CodeTimer("run: Existing images for %d: " % self.prj_id,
                           logger):
                import_how.objects_and_images_to_skip = Image.fetch_existing_images(
                    self.session, self.prj_id)
        import_how.do_thumbnail_above(int(self.config['THUMBSIZELIMIT']))

        # Do the bulk job of import
        row_count = source_bundle.do_import(import_where, import_how,
                                            self.req.rowcount,
                                            self.report_progress)

        # Update loaded files in DB, removing duplicates
        self.prj.fileloaded = "\n".join(set(import_how.loaded_files))
        self.session.commit()

        # Recompute stats
        ProjectBO.do_after_load(self.session, self.prj_id)
        self.session.commit()

        logger.info("Total of %d rows loaded" % row_count)

        # Prepare response
        ret = ImportRealRsp()
        return ret
Пример #7
0
    def do_real(self) -> None:
        """
            Do the real job, i.e. write everywhere (DB/filesystem)
        """
        loaded_files = none_to_empty(self.prj.fileloaded).splitlines()
        logger.info("Previously loaded files: %s", loaded_files)

        found_users, taxo_found, col_mapping_dict, \
        nb_rows, source_path = self._load_vars_from_state(self.STATE_KEYS)

        # Save mappings straight away
        col_mapping = ProjectMapping().load_from_dict(col_mapping_dict)
        col_mapping.write_to_project(self.prj)
        self.session.commit()

        # TODO: Duplicated code
        source_bundle = InBundle(
            source_path, Path(self.temp_for_jobs.data_dir_for(self.job_id)))
        # Configure the import to come, destination
        db_writer = DBWriter(self.session)
        import_where = ImportWhere(
            db_writer, self.vault,
            self.temp_for_jobs.base_dir_for(self.job_id))
        # Configure the import to come, directives
        import_how = ImportHow(self.prj_id, self.req.update_mode, col_mapping,
                               self.req.skip_existing_objects, loaded_files)
        import_how.taxo_mapping = self.req.taxo_mappings
        import_how.found_taxa = taxo_found
        import_how.found_users = found_users
        if self.req.skip_loaded_files:
            import_how.compute_skipped(source_bundle, logger)
        if self.req.skip_existing_objects:
            # If we must skip existing objects then do an inventory of what's in already
            with CodeTimer("run: Existing images for %d: " % self.prj_id,
                           logger):
                import_how.objects_and_images_to_skip = Image.fetch_existing_images(
                    self.session, self.prj_id)
        import_how.do_thumbnail_above(int(self.config['THUMBSIZELIMIT']))

        # Do the bulk job of import
        rowcount_from_validate = nb_rows
        row_count = source_bundle.do_import(import_where, import_how,
                                            rowcount_from_validate,
                                            self.report_progress)

        # Update loaded files in DB, removing duplicates
        self.prj.fileloaded = "\n".join(set(import_how.loaded_files))
        self.session.commit()

        # Recompute stats
        ProjectBO.do_after_load(self.session, self.prj_id)
        self.session.commit()

        msg = "Total of %d rows loaded" % row_count
        logger.info(msg)
        self.set_job_result(errors=[], infos={"rowcount": row_count})
Пример #8
0
    def do_import(self):
        """
            Do the real job, i.e. copy files while creating records.
        """
        errors = []
        self.manage_uploaded()
        self.unzip_if_needed()
        # Use a Bundle
        source_bundle = InBundle(
            self.source_dir_or_zip,
            Path(self.temp_for_task.data_dir_for(self.task_id)))
        # Clean it, in case the ZIP contains a CSV
        source_bundle.remove_all_tsvs()
        images = source_bundle.list_image_files()
        # Configure the import to come, destination
        db_writer = DBWriter(self.session)
        import_where = ImportWhere(
            db_writer, self.vault,
            self.temp_for_task.base_dir_for(self.task_id))
        # Configure the import to come, directives
        import_how = ImportHow(prj_id=self.prj_id,
                               update_mode="",
                               custom_mapping=ProjectMapping(),
                               skip_object_duplicates=False,
                               loaded_files=[])
        import_how.do_thumbnail_above(int(self.config['THUMBSIZELIMIT']))
        # Generate TSV
        req_values = self.req.values
        if req_values.get(SimpleImportFields.userlb, ""):
            import_how.found_users["user"] = {
                "id": req_values.get(SimpleImportFields.userlb)
            }
            req_values[SimpleImportFields.userlb] = "user"
        if req_values.get(SimpleImportFields.status, ""):
            req_values[SimpleImportFields.status] = classif_qual.get(
                req_values[SimpleImportFields.status], "")
        self.make_tsv(source_bundle, images)
        # Import
        nb_image_files = len(images)
        nb_images = source_bundle.do_import(import_where, import_how,
                                            nb_image_files,
                                            self.report_progress)
        self.session.commit()

        # Recompute stats and so on
        ProjectBO.do_after_load(self.session, self.prj_id)
        self.session.commit()

        ret = SimpleImportRsp(errors=errors, nb_images=nb_images)
        return ret
Пример #9
0
    def validate_import(self, how: ImportHow, diag: ImportDiagnostic, session: Session, report_def: Callable) -> int:
        """
            Validate the full bundle, i.e. every contained file.
            :return:
        """
        with CodeTimer("validate_import: Existing images for %d: " % how.prj_id, logger):
            how.objects_and_images_to_skip = Image.fetch_existing_images(session, how.prj_id)

        total_row_count = self.validate_each_file(how, diag, report_def)

        if total_row_count == 0:
            # Try to be explicit in messages
            nb_found = len(self.possible_files)
            nb_skipped = len(diag.skipped_files)
            err_msg = ["No object to import."]
            if nb_found == 0:
                err_msg.append("* No .txt or .tsv file was found, of which name starts with 'ecotaxa'.")
            else:
                nb_validated = nb_found - nb_skipped
                if nb_skipped > 0:
                    if nb_validated == 0:
                        err_msg.append("* 'SKIP TSV' option was set and all TSV files were imported before.")
                    else:
                        err_msg.append("* 'SKIP TSV' option was set and new TSV file(s) are not compliant.")
                if nb_validated > 0:
                    err_msg.append("*  TSV file(s) might be empty.")
                if how.skip_object_duplicates:
                    err_msg.append("*  'SKIP OBJECTS' option was set and all objects might be in already.")
            diag.error("<br>".join(err_msg))

        if len(diag.classif_id_seen) > 0:
            self.check_classif(session, diag, diag.classif_id_seen)

        logger.info("Taxo Found = %s", how.taxo_found)
        logger.info("Users Found = %s", how.found_users)
        not_seen_fields = how.custom_mapping.all_fields.keys() - diag.cols_seen
        logger.info("For Information, not seen fields %s", not_seen_fields)
        if len(not_seen_fields) > 0:
            diag.warn("Some fields configured in the project are not seen in this import {0} "
                      .format(", ".join(not_seen_fields)))
        if diag.nb_objects_without_gps > 0:
            diag.warn("{0} object(s) don't have GPS information."
                      .format(diag.nb_objects_without_gps))
        return total_row_count
Пример #10
0
 def after_import(how: ImportHow):
     how.vignette_maker = None