示例#1
0
    def __init__(self, name, parent_app, data_set,
                 descriptor_type, indexer_type,
                 url_prefix=None):
        """
        Initialize a generic IQR Search module with a single descriptor and
        indexer.

        :param name: Name of this blueprint instance
        :type name: str

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :param data_set: Data set to work over
        :type data_set: SMQTK.data_rep.DataSet

        :param descriptor_type: Feature Descriptor type string
        :type descriptor_type: str

        :param indexer_type: indexer type string
        :type indexer_type: str

        :param url_prefix: Web address prefix for this blueprint.
        :type url_prefix: str

        :raises ValueError: Invalid Descriptor or indexer type

        """
        super(IQRSearch, self).__init__(
            name, import_name=__name__,
            static_folder=os.path.join(SCRIPT_DIR, "static"),
            template_folder=os.path.join(SCRIPT_DIR, "templates"),
            url_prefix=url_prefix
        )

        # Make sure that the configured descriptor/indexer types exist, as
        # we as their system configuration sections
        if descriptor_type not in ContentDescriptorConfiguration.available_labels():
            raise ValueError("'%s' not a valid descriptor type" % descriptor_type)
        if indexer_type not in IndexerConfiguration.available_labels():
            raise ValueError("'%s' not a valid indexer type" % indexer_type)

        self._parent_app = parent_app
        self._data_set = data_set
        self._fd_type_str = descriptor_type
        self._idxr_type_str = indexer_type

        self._explicit_uids = set()
        self._explicit_uids_lock = multiprocessing.RLock()
        # TODO: Read in dict from save file

        # Uploader Sub-Module
        self.upload_work_dir = os.path.join(self.work_dir, "uploads")
        self.mod_upload = FileUploadMod('%s_uploader' % self.name, parent_app,
                                        self.upload_work_dir,
                                        url_prefix='/uploader')
        self.register_blueprint(self.mod_upload)

        # IQR Session control
        # TODO: Move session management to database. Create web-specific
        #       IqrSession class that stores/gets its state directly from
        #       database.
        self._iqr_controller = IqrController()

        # structures for session ingest progress
        # Two levels: SID -> FID
        self._ingest_progress_locks = {}
        self._ingest_progress = {}

        # Preview Image Caching
        # TODO: Initialize this into static directory that is being served.
        self._preview_cache = PreviewCache(osp.join(self.work_dir, "Previews"))

        # Directory to write data for static viewing
        self._static_data_dir = os.path.join(self.static_folder, 'tmp_data')
        # Cache mapping of written static files for data elements
        self._static_cache = {}

        #
        # Routing
        #

        @self.route("/")
        @self._parent_app.module_login.login_required
        def index():
            r = {
                "module_name": self.name,
                "uploader_url": self.mod_upload.url_prefix,
                "uploader_post_url": self.mod_upload.upload_post_url(),
            }
            r.update(parent_app.nav_bar_content())
            # noinspection PyUnresolvedReferences
            return flask.render_template("iqr_search_index.html", **r)

        @self.route('/iqr_session_info', methods=["GET"])
        @self._parent_app.module_login.login_required
        def iqr_session_info():
            """
            Get information about the current IRQ session
            """
            with self.get_current_iqr_session() as iqrs:
                # noinspection PyProtectedMember
                return flask.jsonify({
                    "uuid": iqrs.uuid,
                    "positive_uids": tuple(iqrs.positive_ids),
                    "negative_uids": tuple(iqrs.negative_ids),
                    "extension_ingest_contents":
                        dict((uid, str(df))
                             for uid, df in iqrs.extension_ds.iteritems()),
                    "FeatureMemory": {
                    }
                })

        @self.route("/check_current_iqr_session")
        @self._parent_app.module_login.login_required
        def check_current_iqr_session():
            """
            Check that the current IQR session exists and is initialized.

            :rtype: {
                    success: bool
                }
            """
            # Getting the current IQR session ensures that one has been
            # constructed for the current session.
            with self.get_current_iqr_session():
                return flask.jsonify({
                    "success": True
                })

        @self.route('/iqr_ingest_file', methods=['POST'])
        @self._parent_app.module_login.login_required
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: status message
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                iqr_sess = self.get_current_iqr_session()
                fid = flask.request.form['fid']

                self.log.debug("[%s::%s] Getting temporary filepath from "
                               "uploader module", iqr_sess.uuid, fid)
                upload_filepath = self.mod_upload.get_path_for_id(fid)
                self.mod_upload.clear_completed(fid)

                # Extend session ingest -- modifying
                with iqr_sess:
                    self.log.debug("[%s::%s] Adding new file to extension "
                                   "ingest", iqr_sess.uuid, fid)
                    sess_upload = osp.join(iqr_sess.work_dir,
                                           osp.basename(upload_filepath))
                    os.rename(upload_filepath, sess_upload)
                    upload_data = DataFileElement(sess_upload)
                    iqr_sess.extension_ds.add_data(upload_data)

                # Compute feature for data -- non-modifying
                self.log.debug("[%s::%s] Computing feature for file",
                               iqr_sess.uuid, fid)
                feat = iqr_sess.descriptor.compute_descriptor(upload_data)

                # Extend indexer model with feature data -- modifying
                with iqr_sess:
                    self.log.debug("[%s::%s] Extending indexer model with "
                                   "feature", iqr_sess.uuid, fid)
                    iqr_sess.indexer.extend_model({upload_data.uuid(): feat})

                    # of course, add the new data element as a positive
                    iqr_sess.adjudicate((upload_data.uuid(),))

                return "Finished Ingestion"

        @self.route("/adjudicate", methods=["POST", "GET"])
        @self._parent_app.module_login.login_required
        def adjudicate():
            """
            Update adjudication for this session

            :return: {
                    success: <bool>,
                    message: <str>
                }
            """
            if flask.request.method == "POST":
                fetch = flask.request.form
            elif flask.request.method == "GET":
                fetch = flask.request.args
            else:
                raise RuntimeError("Invalid request method '%s'"
                                   % flask.request.method)

            pos_to_add = json.loads(fetch.get('add_pos', '[]'))
            pos_to_remove = json.loads(fetch.get('remove_pos', '[]'))
            neg_to_add = json.loads(fetch.get('add_neg', '[]'))
            neg_to_remove = json.loads(fetch.get('remove_neg', '[]'))

            self.log.debug("Adjudicated Positive{+%s, -%s}, Negative{+%s, -%s} "
                           % (pos_to_add, pos_to_remove,
                              neg_to_add, neg_to_remove))

            with self.get_current_iqr_session() as iqrs:
                iqrs.adjudicate(pos_to_add, neg_to_add,
                                pos_to_remove, neg_to_remove)
            return flask.jsonify({
                "success": True,
                "message": "Adjudicated Positive{+%s, -%s}, Negative{+%s, -%s} "
                           % (pos_to_add, pos_to_remove,
                              neg_to_add, neg_to_remove)
            })

        @self.route("/get_item_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_adjudication():
            """
            Get the adjudication status of a particular result by ingest ID.

            This should only ever return a dict where one of the two, or
            neither, are labeled True.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }
            """
            ingest_uid = flask.request.args['uid']
            with self.get_current_iqr_session() as iqrs:
                return flask.jsonify({
                    "is_pos": ingest_uid in iqrs.positive_ids,
                    "is_neg": ingest_uid in iqrs.negative_ids
                })

        @self.route("/get_positive_uids", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_positive_uids():
            """
            Get a list of the positive ingest UIDs

            :return: {
                    uids: list of <int>
                }
            """
            with self.get_current_iqr_session() as iqrs:
                return flask.jsonify({
                    "uids": list(iqrs.positive_ids)
                })

        @self.route("/get_random_uids")
        @self._parent_app.module_login.login_required
        def get_random_uids():
            """
            Return to the client a list of all known dataset IDs but in a random
            order. If there is currently an active IQR session with elements in
            its extension ingest, then those IDs are included in the random
            list.

            :return: {
                    uids: list of int
                }
            """
            all_ids = self._data_set.uuids()
            with self.get_current_iqr_session() as iqrs:
                all_ids.update(iqrs.extension_ds.uuids())
            all_ids = list(all_ids)
            random.shuffle(all_ids)
            return flask.jsonify({
                "uids": all_ids
            })

        @self.route("/get_ingest_image_preview_data", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_ingest_item_image_rep():
            """
            Return the base64 preview image data for the data file associated
            with the give UID.
            """
            uid = flask.request.args['uid']

            info = {
                "success": True,
                "message": None,
                "is_explicit": None,
                "shape": None,  # (width, height)
                "data": None,
                "ext": None,
                "static_file_link": None,
            }

            #: :type: smqtk.data_rep.DataElement
            de = None
            if self._data_set.has_uuid(uid):
                de = self._data_set.get_data(uid)
                with self._explicit_uids_lock:
                    info["is_explicit"] = uid in self._explicit_uids
            else:
                with self.get_current_iqr_session() as iqrs:
                    if iqrs.extension_ds.has_uuid(uid):
                        de = iqrs.extension_ds.get_data(uid)
                        info["is_explicit"] = uid in self._explicit_uids

            if not de:
                info["success"] = False
                info["message"] = "UUID not part of the active data set!"
            else:
                # TODO: Have data-file return an HTML chunk for implementation
                #       defined visualization?
                img_path = self._preview_cache.get_preview_image(de)
                img = PIL.Image.open(img_path)
                info["shape"] = img.size
                with open(img_path, 'rb') as img_file:
                    info["data"] = base64.encodestring(img_file.read())
                info["ext"] = osp.splitext(img_path)[1].lstrip('.')

                if de.uuid() not in self._static_cache:
                    self._static_cache[de.uuid()] = \
                        de.write_temp(self._static_data_dir)
                info['static_file_link'] = 'static/' \
                    + os.path.relpath(self._static_cache[de.uuid()],
                                      self.static_folder)

            return flask.jsonify(info)

        @self.route("/mark_uid_explicit", methods=["POST"])
        @self._parent_app.module_login.login_required
        def mark_uid_explicit():
            """
            Mark a given UID as explicit in its containing ingest.

            :return: Success value of True if the given UID was valid and set
                as explicit in its containing ingest.
            :rtype: {
                "success": bool
            }
            """
            uid = flask.request.form['uid']
            self._explicit_uids.add(uid)
            # TODO: Save out dict

            return flask.jsonify({'success': True})

        @self.route("/iqr_refine", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_refine():
            """
            Classify current IQR session indexer, updating ranking for
            display.

            Fails gracefully if there are no positive[/negative] adjudications.

            Expected Args:
            """
            pos_to_add = json.loads(flask.request.form.get('add_pos', '[]'))
            pos_to_remove = json.loads(flask.request.form.get('remove_pos', '[]'))
            neg_to_add = json.loads(flask.request.form.get('add_neg', '[]'))
            neg_to_remove = json.loads(flask.request.form.get('remove_neg', '[]'))

            with self.get_current_iqr_session() as iqrs:
                try:
                    iqrs.refine(pos_to_add, neg_to_add,
                                pos_to_remove, neg_to_remove)
                    return flask.jsonify({
                        "success": True,
                        "message": "Completed refinement"
                    })
                except Exception, ex:
                    return flask.jsonify({
                        "success": False,
                        "message": "ERROR: %s: %s" % (type(ex).__name__,
                                                      ex.message)
                    })
示例#2
0
class IqrSearch (SmqtkObject, flask.Flask, Configurable):
    """
    IQR Search Tab blueprint

    Components:
        * Data-set, from which base media data is provided
        * Descriptor generator, which provides descriptor generation services
          for user uploaded data.
        * NearestNeighborsIndex, from which descriptors are queried from user
          input data. This index should contain descriptors that were
          generated by the same descriptor generator configuration above (same
          dimensionality, etc.).
        * RelevancyIndex, which is populated by an initial query, and then
          iterated over within the same user session. A new instance and model
          is generated every time a new session is created (or new data is
          uploaded by the user).

    Assumes:
        * DescriptorElement related to a DataElement have the same UUIDs.

    """

    # TODO: User access white/black-list? See ``search_app/__init__.py``:L135

    @classmethod
    def get_default_config(cls):
        d = super(IqrSearch, cls).get_default_config()

        # Remove parent_app slot for later explicit specification.
        del d['parent_app']

        d['iqr_service_url'] = None

        # fill in plugin configs
        d['data_set'] = plugin.make_config(get_data_set_impls())

        return d

    # noinspection PyMethodOverriding
    @classmethod
    def from_config(cls, config, parent_app):
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        :param config: JSON compliant dictionary encapsulating
            a configuration.
        :type config: dict

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :return: Constructed instance from the provided config.
        :rtype: IqrSearch

        """
        merged = cls.get_default_config()
        merged.update(config)

        # construct nested objects via configurations
        merged['data_set'] = \
            plugin.from_plugin_config(merged['data_set'],
                                      get_data_set_impls())

        return cls(parent_app, **merged)

    def __init__(self, parent_app, iqr_service_url, data_set,
                 working_directory):
        """
        Initialize a generic IQR Search module with a single descriptor and
        indexer.

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.IqrSearchDispatcher

        :param iqr_service_url: Base URL to the IQR service to use for this
            application interface. Any trailing slashes will be striped.
        :type iqr_service_url: str

        :param data_set: DataSet of the content described by indexed descriptors
            in the linked IQR service.
        :type data_set: smqtk.representation.DataSet

        :param working_directory: Directory in which to place working files.
            These may be considered temporary and may be removed between
            executions of this app.
        :type working_directory: str

        :raises ValueError: Invalid Descriptor or indexer type

        """
        super(IqrSearch, self).__init__(
            import_name=__name__,
            static_folder=os.path.join(SCRIPT_DIR, "static"),
            template_folder=os.path.join(SCRIPT_DIR, "templates"),
        )

        self._parent_app = parent_app
        self._data_set = data_set
        self._iqr_service = IqrServiceProxy(iqr_service_url.rstrip('/'))

        # base directory that's transformed by the ``work_dir`` property into
        # an absolute path.
        self._working_dir = working_directory
        # Directory to put things to allow them to be statically available to
        # public users.
        self._static_data_prefix = "static/data"
        self._static_data_dir = osp.join(self.work_dir, 'static')

        # Custom static host sub-module
        self.mod_static_dir = StaticDirectoryHost('%s_static' % self.name,
                                                  self._static_data_dir,
                                                  self._static_data_prefix)
        self.register_blueprint(self.mod_static_dir)

        # Uploader Sub-Module
        self.upload_work_dir = os.path.join(self.work_dir, "uploads")
        self.mod_upload = FileUploadMod('%s_uploader' % self.name, parent_app,
                                        self.upload_work_dir,
                                        url_prefix='/uploader')
        self.register_blueprint(self.mod_upload)
        self.register_blueprint(parent_app.module_login)

        # Mapping of session IDs to their work directory
        #: :type: dict[str, str]
        self._iqr_work_dirs = {}
        # Mapping of session ID to a dictionary of the custom example data for
        # a session (uuid -> DataElement)
        #: :type: dict[str, dict[collections.Hashable, smqtk.representation.DataElement]]
        self._iqr_example_data = {}

        # Preview Image Caching
        self._preview_cache = PreviewCache(osp.join(self._static_data_dir,
                                                    "previews"))

        # Cache mapping of written static files for data elements
        self._static_cache = {}
        self._static_cache_element = {}

        #
        # Routing
        #

        @self.route("/")
        @self._parent_app.module_login.login_required
        def index():
            # Stripping left '/' from blueprint modules in order to make sure
            # the paths are relative to our base.
            r = {
                "module_name": self.name,
                "uploader_url": self.mod_upload.url_prefix.lstrip('/'),
                "uploader_post_url":
                    self.mod_upload.upload_post_url().lstrip('/'),
            }
            self._log.debug("Uploader URL: %s", r['uploader_url'])
            # noinspection PyUnresolvedReferences
            return flask.render_template("iqr_search_index.html", **r)

        @self.route('/iqr_session_info', methods=["GET"])
        @self._parent_app.module_login.login_required
        def iqr_session_info():
            """
            Get information about the current IRQ session
            """
            sid = self.get_current_iqr_session()
            get_r = self._iqr_service.get('session', sid=sid)
            get_r.raise_for_status()
            return flask.jsonify(get_r.json())

        @self.route('/get_iqr_state')
        @self._parent_app.module_login.login_required
        def iqr_session_state():
            """
            Get IQR session state information composed of positive and negative
            descriptor vectors.

            We append to the state received from the service in order to produce
            a state byte package that is compatible with the
            ``IqrSession.set_state_bytes`` method. This way state bytes received
            from this function can be directly consumed by the IQR service or
            other IqrSession instances.

            """
            sid = self.get_current_iqr_session()

            # Get the state base64 from the underlying service.
            r_get = self._iqr_service.get('state', sid=sid)
            r_get.raise_for_status()
            state_b64 = r_get.json()['state_b64']
            state_bytes = base64.b64decode(state_b64)

            # Load state dictionary from base-64 ZIP payload from service
            # - GET content is base64, so decode first and then read as a
            #   ZipFile buffer.
            # - `r_get.content` is `byte` type so it can be passed directly to
            #   base64 decode.
            state_dict = json.load(
                zipfile.ZipFile(
                    BytesIO(state_bytes),
                    'r',
                    IqrSession.STATE_ZIP_COMPRESSION
                ).open(IqrSession.STATE_ZIP_FILENAME)
            )
            r_get.close()

            # Wrap service state with our UI state: uploaded data elements.
            # Data elements are stored as a dictionary mapping UUID to MIMETYPE
            # and data byte string.
            working_data = {}
            sid_data_elems = self._iqr_example_data.get(sid, {})
            for uid, workingElem in six.iteritems(sid_data_elems):
                working_data[uid] = {
                    'content_type': workingElem.content_type(),
                    'bytes_base64':
                        base64.b64encode(workingElem.get_bytes()),
                }

            state_dict["working_data"] = working_data
            state_json = json.dumps(state_dict)

            z_wrapper_buffer = BytesIO()
            z_wrapper = zipfile.ZipFile(z_wrapper_buffer, 'w',
                                        IqrSession.STATE_ZIP_COMPRESSION)
            z_wrapper.writestr(IqrSession.STATE_ZIP_FILENAME, state_json)
            z_wrapper.close()

            z_wrapper_buffer.seek(0)
            return flask.send_file(
                z_wrapper_buffer,
                mimetype='application/octet-stream',
                as_attachment=True,
                attachment_filename="%s.IqrState" % sid
            )

        @self.route('/set_iqr_state', methods=['PUT'])
        @self._parent_app.module_login.login_required
        def set_iqr_session_state():
            """
            Set the current state based on the given state file.
            """
            sid = self.get_current_iqr_session()
            fid = flask.request.form.get('fid', None)

            return_obj = {
                'success': False,
            }

            #
            # Load in state zip package, prepare zip package for service
            #

            if fid is None:
                return_obj['message'] = 'No file ID provided.'

            self._log.debug("[%s::%s] Getting temporary filepath from "
                            "uploader module", sid, fid)
            upload_filepath = self.mod_upload.get_path_for_id(fid)
            self.mod_upload.clear_completed(fid)

            # Load ZIP package back in, then remove the uploaded file.
            try:
                z = zipfile.ZipFile(
                    upload_filepath,
                    compression=IqrSession.STATE_ZIP_COMPRESSION
                )
                with z.open(IqrSession.STATE_ZIP_FILENAME) as f:
                    state_dict = json.load(f)
                z.close()
            finally:
                os.remove(upload_filepath)

            #
            # Consume working data UUID/bytes
            #
            # Reset this server's resources for an SID
            self.reset_session_local(sid)
            # - Dictionary of data UUID (SHA1) to {'content_type': <str>,
            #   'bytes_base64': <str>} dictionary.
            #: :type: dict[str, dict]
            working_data = state_dict['working_data']
            del state_dict['working_data']
            # - Write out base64-decoded files to session-specific work
            #   directory.
            # - Update self._iqr_example_data with DataFileElement instances
            #   referencing the just-written files.
            for uuid_sha1 in working_data:
                data_mimetype = working_data[uuid_sha1]['content_type']
                data_b64 = str(working_data[uuid_sha1]['bytes_base64'])
                # Output file to working directory on disk.
                data_filepath = os.path.join(
                    self._iqr_work_dirs[sid],
                    '%s%s' % (uuid_sha1, MT.guess_extension(data_mimetype))
                )
                with open(data_filepath, 'wb') as f:
                    f.write(base64.urlsafe_b64decode(data_b64))
                # Create element reference and store it for the current session.
                data_elem = DataFileElement(data_filepath, readonly=True)
                self._iqr_example_data[sid][uuid_sha1] = data_elem

            #
            # Re-package service state as a ZIP payload.
            #
            service_zip_buffer = BytesIO()
            service_zip = zipfile.ZipFile(service_zip_buffer, 'w',
                                          IqrSession.STATE_ZIP_COMPRESSION)
            service_zip.writestr(IqrSession.STATE_ZIP_FILENAME,
                                 json.dumps(state_dict))
            service_zip.close()
            service_zip_base64 = \
                base64.b64encode(service_zip_buffer.getvalue())

            # Update service state
            self._iqr_service.put('state',
                                  sid=sid,
                                  state_base64=service_zip_base64)

            return flask.jsonify(return_obj)

        @self.route("/check_current_iqr_session")
        @self._parent_app.module_login.login_required
        def check_current_iqr_session():
            """
            Check that the current IQR session exists and is initialized.

            Return JSON:
                success
                    Always True if the message returns.

            """
            # Getting the current IQR session ensures that one has been
            # constructed for the current session.
            _ = self.get_current_iqr_session()
            return flask.jsonify({
                "success": True
            })

        @self.route("/get_data_preview_image", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_ingest_item_image_rep():
            """
            Return the base64 preview image data link for the data file
            associated with the give UID (plus some other metadata).
            """
            uid = flask.request.args['uid']

            info = {
                "success": True,
                "message": None,
                "shape": None,  # (width, height)
                "static_file_link": None,
                "static_preview_link": None,
            }

            # Try to find a DataElement by the given UUID in our indexed data
            # or in the session's example data.
            if self._data_set.has_uuid(uid):
                #: :type: smqtk.representation.DataElement
                de = self._data_set.get_data(uid)
            else:
                sid = self.get_current_iqr_session()
                #: :type: smqtk.representation.DataElement | None
                de = self._iqr_example_data[sid].get(uid, None)

            if not de:
                info["success"] = False
                info["message"] = "UUID '%s' not part of the base or working " \
                                  "data set!" % uid
            else:
                # Preview_path should be a path within our statically hosted
                # area.
                preview_path = self._preview_cache.get_preview_image(de)
                img = PIL.Image.open(preview_path)
                info["shape"] = img.size

                if de.uuid() not in self._static_cache:
                    self._static_cache[de.uuid()] = \
                        de.write_temp(self._static_data_dir)
                    self._static_cache_element[de.uuid()] = de

                # Need to format links by transforming the generated paths to
                # something usable by webpage:
                # - make relative to the static directory, and then pre-pending
                #   the known static url to the
                info["static_preview_link"] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(preview_path, self._static_data_dir)
                info['static_file_link'] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(self._static_cache[de.uuid()],
                                    self._static_data_dir)

            return flask.jsonify(info)

        @self.route('/iqr_ingest_file', methods=['POST'])
        @self._parent_app.module_login.login_required
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            fid = flask.request.form['fid']

            sid = self.get_current_iqr_session()

            self._log.debug("[%s::%s] Getting temporary filepath from "
                            "uploader module", sid, fid)
            upload_filepath = self.mod_upload.get_path_for_id(fid)
            self.mod_upload.clear_completed(fid)

            self._log.debug("[%s::%s] Moving uploaded file",
                            sid, fid)
            sess_upload = osp.join(self._iqr_work_dirs[sid],
                                   osp.basename(upload_filepath))
            os.rename(upload_filepath, sess_upload)

            # Record uploaded data as user example data for this session.
            upload_data = DataFileElement(sess_upload)
            uuid = upload_data.uuid()
            self._iqr_example_data[sid][uuid] = upload_data

            # Extend session ingest -- modifying
            self._log.debug("[%s::%s] Adding new data to session "
                            "external positives", sid, fid)
            data_b64 = base64.b64encode(upload_data.get_bytes())
            data_ct = upload_data.content_type()
            r = self._iqr_service.post('add_external_pos', sid=sid,
                                       base64=data_b64, content_type=data_ct)
            r.raise_for_status()

            return str(uuid)

        @self.route("/iqr_initialize", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_initialize():
            """
            Initialize IQR session working index based on current positive
            examples and adjudications.
            """
            sid = self.get_current_iqr_session()

            # (Re)Initialize working index
            post_r = self._iqr_service.post('initialize', sid=sid)
            post_r.raise_for_status()

            return flask.jsonify(post_r.json())

        @self.route("/get_example_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_example_adjudication():
            """
            Get positive/negative status for a data/descriptor in our example
            set.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }

            """
            # TODO: Collapse example and index adjudication endpoints.
            elem_uuid = flask.request.args['uid']
            sid = self.get_current_iqr_session()
            get_r = self._iqr_service.get('adjudicate', sid=sid, uid=elem_uuid)
            get_r.raise_for_status()
            get_r_json = get_r.json()
            return flask.jsonify({
                "is_pos": get_r_json['is_pos'],
                "is_neg": get_r_json['is_neg'],
            })

        @self.route("/get_index_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_index_adjudication():
            """
            Get the adjudication status of a particular data/descriptor element
            by UUID.

            This should only ever return a dict where one of the two, or
            neither, are labeled True.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }
            """
            # TODO: Collapse example and index adjudication endpoints.
            elem_uuid = flask.request.args['uid']
            sid = self.get_current_iqr_session()
            get_r = self._iqr_service.get('adjudicate', sid=sid, uid=elem_uuid)
            get_r.raise_for_status()
            get_r_json = get_r.json()
            return flask.jsonify({
                "is_pos": get_r_json['is_pos'],
                "is_neg": get_r_json['is_neg'],
            })

        @self.route("/adjudicate", methods=["POST"])
        @self._parent_app.module_login.login_required
        def adjudicate():
            """
            Update adjudication for this session. This should specify UUIDs of
            data/descriptor elements in our working index.

            :return: {
                    success: <bool>,
                    message: <str>
                }
            """
            pos_to_add = json.loads(flask.request.form.get('add_pos', '[]'))
            pos_to_remove = json.loads(flask.request.form.get('remove_pos',
                                                              '[]'))
            neg_to_add = json.loads(flask.request.form.get('add_neg', '[]'))
            neg_to_remove = json.loads(flask.request.form.get('remove_neg',
                                                              '[]'))

            msg = "Adjudicated Positive{+%s, -%s}, " \
                  "Negative{+%s, -%s} " \
                  % (pos_to_add, pos_to_remove,
                     neg_to_add, neg_to_remove)
            self._log.debug(msg)

            sid = self.get_current_iqr_session()

            to_neutral = list(set(pos_to_remove) | set(neg_to_remove))

            post_r = self._iqr_service.post('adjudicate',
                                            sid=sid,
                                            pos=json.dumps(pos_to_add),
                                            neg=json.dumps(neg_to_add),
                                            neutral=json.dumps(to_neutral))
            post_r.raise_for_status()

            return flask.jsonify({
                "success": True,
                "message": msg
            })

        @self.route("/iqr_refine", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_refine():
            """
            Classify current IQR session indexer, updating ranking for
            display.

            Fails gracefully if there are no positive[/negative] adjudications.

            """
            sid = self.get_current_iqr_session()
            post_r = self._iqr_service.post('refine', sid=sid)
            post_r.raise_for_status()
            return flask.jsonify({
                "success": True,
                "message": "Completed refinement",
            })

        @self.route("/iqr_ordered_results", methods=['GET'])
        @self._parent_app.module_login.login_required
        def get_ordered_results():
            """
            Get ordered (UID, probability) pairs in between the given indices,
            [i, j). If j Is beyond the end of available results, only available
            results are returned.

            This may be empty if no refinement has yet occurred.

            Return format:
            {
                results: [ (uid, probability), ... ]
            }
            """
            i = flask.request.args.get('i', None)
            j = flask.request.args.get('j', None)

            params = {
                'sid': self.get_current_iqr_session(),
            }
            if i is not None:
                params['i'] = int(i)
            if j is not None:
                params['j'] = int(j)

            get_r = self._iqr_service.get('get_results', **params)
            get_r.raise_for_status()
            return flask.jsonify(get_r.json())

        @self.route("/reset_iqr_session", methods=["POST"])
        @self._parent_app.module_login.login_required
        def reset_iqr_session():
            """
            Reset the current IQR session
            """
            sid = self.get_current_iqr_session()
            # Reset service
            put_r = self._iqr_service.put('session', sid=sid)
            put_r.raise_for_status()
            # Reset local server resources
            self.reset_session_local(sid)
            return flask.jsonify({"success": True})

        @self.route("/get_random_uids")
        @self._parent_app.module_login.login_required
        def get_random_uids():
            """
            Return to the client a list of data/descriptor IDs available in the
            configured data set (NOT descriptor/NNI set).

            Thus, we assume that the nearest neighbor index that is searchable
            is from at least this set of data.

            :return: {
                    uids: list[str]
                }
            """
            all_ids = list(self._data_set.uuids())
            random.shuffle(all_ids)
            return flask.jsonify({
                "uids": all_ids
            })

        @self.route('/is_ready')
        def is_ready():
            """ Simple 'I'm alive' endpoint """
            return flask.jsonify({
                "alive": True,
            })

    def __del__(self):
        for wdir in self._iqr_work_dirs.values():
            if os.path.isdir(wdir):
                shutil.rmtree(wdir)

    def get_config(self):
        return {
            'iqr_service_url': self._iqr_service.url,
            'working_directory': self._working_dir,
            'data_set': plugin.to_plugin_config(self._data_set),
        }

    @property
    def work_dir(self):
        """
        :return: Common work directory for this instance.
        :rtype: str
        """
        return osp.expanduser(osp.abspath(self._working_dir))

    def get_current_iqr_session(self):
        """
        Get the current IQR Session UUID.

        :rtype: str

        """
        sid = str(flask.session.sid)

        # Ensure there is an initialized session on the configured service.
        created_session = False
        get_r = self._iqr_service.get('session_ids')
        get_r.raise_for_status()
        if sid not in get_r.json()['session_uuids']:
            post_r = self._iqr_service.post('session', sid=sid)
            post_r.raise_for_status()
            created_session = True

        if created_session or (sid not in self._iqr_work_dirs):
            # Dictionaries not initialized yet for this UUID.
            self._iqr_work_dirs[sid] = osp.join(self.work_dir, sid)
            self._iqr_example_data[sid] = {}

            safe_create_dir(self._iqr_work_dirs[sid])

        return sid

    def reset_session_local(self, sid):
        """
        Reset elements of this server for a given session ID.

        A given ``sid`` must have been created first. This happens in the
        ``get_current_iqr_session`` method.

        This does not affect the linked IQR service.

        :param sid: Session ID to reset for.
        :type sid: str

        :raises KeyError: ``sid`` not recognized. Probably not initialized
            first.

        """
        # Also clear work sub-directory and example data state
        if os.path.isdir(self._iqr_work_dirs[sid]):
            shutil.rmtree(self._iqr_work_dirs[sid])
        safe_create_dir(self._iqr_work_dirs[sid])

        self._iqr_example_data[sid].clear()
示例#3
0
class IqrSearch(SmqtkObject, flask.Blueprint, Configurable):
    """
    IQR Search Tab blueprint

    Components:
        * Data-set, from which base media data is provided
        * Descriptor generator, which provides descriptor generation services
          for user uploaded data.
        * NearestNeighborsIndex, from which descriptors are queried from user
          input data. This index should contain descriptors that were
          generated by the same descriptor generator configuration above (same
          dimensionality, etc.).
        * RelevancyIndex, which is populated by an initial query, and then
          iterated over within the same user session. A new instance and model
          is generated every time a new session is created (or new data is
          uploaded by the user).

    Assumes:
        * DescriptorElement related to a DataElement have the same UUIDs.

    """
    @classmethod
    def get_default_config(cls):
        d = super(IqrSearch, cls).get_default_config()

        # Remove parent_app slot for later explicit specification.
        del d['parent_app']

        # fill in plugin configs
        d['data_set'] = plugin.make_config(get_data_set_impls())

        d['descr_generator'] = \
            plugin.make_config(get_descriptor_generator_impls())

        d['nn_index'] = plugin.make_config(get_nn_index_impls())

        ri_config = plugin.make_config(get_relevancy_index_impls())
        if d['rel_index_config']:
            ri_config.update(d['rel_index_config'])
        d['rel_index_config'] = ri_config

        df_config = DescriptorElementFactory.get_default_config()
        if d['descriptor_factory']:
            df_config.update(d['descriptor_factory'].get_config())
        d['descriptor_factory'] = df_config

        return d

    # noinspection PyMethodOverriding
    @classmethod
    def from_config(cls, config, parent_app):
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        :param config: JSON compliant dictionary encapsulating
            a configuration.
        :type config: dict

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :return: Constructed instance from the provided config.
        :rtype: IqrSearch

        """
        merged = cls.get_default_config()
        merged.update(config)

        # construct nested objects via configurations
        merged['data_set'] = \
            plugin.from_plugin_config(merged['data_set'],
                                      get_data_set_impls())
        merged['descr_generator'] = \
            plugin.from_plugin_config(merged['descr_generator'],
                                      get_descriptor_generator_impls())
        merged['nn_index'] = \
            plugin.from_plugin_config(merged['nn_index'],
                                      get_nn_index_impls())

        merged['descriptor_factory'] = \
            DescriptorElementFactory.from_config(merged['descriptor_factory'])

        return cls(parent_app, **merged)

    def __init__(self,
                 parent_app,
                 name,
                 data_set,
                 descr_generator,
                 nn_index,
                 working_directory,
                 rel_index_config=DFLT_REL_INDEX_CONFIG,
                 descriptor_factory=DFLT_MEMORY_DESCR_FACTORY,
                 url_prefix=None,
                 pos_seed_neighbors=500):
        """
        Initialize a generic IQR Search module with a single descriptor and
        indexer.

        :param name: Name of this blueprint instance
        :type name: str

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :param data_set: DataSet instance that references indexed data.
        :type data_set: SMQTK.representation.DataSet

        :param descr_generator: DescriptorGenerator instance to use in IQR
            sessions for generating descriptors on new data.
        :type descr_generator:
            smqtk.algorithms.descriptor_generator.DescriptorGenerator

        :param nn_index: NearestNeighborsIndex instance for sessions to pull
            their review data sets from.
        :type nn_index: smqtk.algorithms.NearestNeighborsIndex

        :param rel_index_config: Plugin configuration for the RelevancyIndex to
            use.
        :type rel_index_config: dict

        :param working_directory: Directory in which to place working files.
            These may be considered temporary and may be removed between
            executions of this app. Retention of a work directory may speed
            things up in subsequent runs because of caching.

        :param descriptor_factory: DescriptorElementFactory for producing new
            DescriptorElement instances when data is uploaded to the server.
        :type descriptor_factory: DescriptorElementFactory

        :param url_prefix: Web address prefix for this blueprint.
        :type url_prefix: str

        :param pos_seed_neighbors: Number of neighbors to pull from the given
            ``nn_index`` for each positive exemplar when populating the working
            index, i.e. this value determines the size of the working index for
            IQR refinement. By default, we try to get 500 neighbors.

            Since there may be partial to significant overlap of near neighbors
            as a result of nn_index queries for positive exemplars, the working
            index may contain anywhere from this value's number of entries, to
            ``N*P``, where ``N`` is this value and ``P`` is the number of
            positive examples at the time of working index initialization.
        :type pos_seed_neighbors: int

        :raises ValueError: Invalid Descriptor or indexer type

        """
        # make sure URL prefix starts with a slash
        if not url_prefix.startswith('/'):
            url_prefix = '/' + url_prefix

        super(IqrSearch,
              self).__init__(name,
                             import_name=__name__,
                             static_folder=os.path.join(SCRIPT_DIR, "static"),
                             template_folder=os.path.join(
                                 SCRIPT_DIR, "templates"),
                             url_prefix=url_prefix)

        self._parent_app = parent_app
        self._data_set = data_set
        self._descriptor_generator = descr_generator
        self._nn_index = nn_index
        self._rel_index_config = rel_index_config
        self._descr_elem_factory = descriptor_factory

        self._pos_seed_neighbors = int(pos_seed_neighbors)

        # base directory that's transformed by the ``work_dir`` property into
        # an absolute path.
        self._working_dir = working_directory
        # Directory to put things to allow them to be statically available to
        # public users.
        self._static_data_prefix = "static/data"
        self._static_data_dir = osp.join(self.work_dir, 'static')

        # Custom static host sub-module
        self.mod_static_dir = StaticDirectoryHost('%s_static' % self.name,
                                                  self._static_data_dir,
                                                  self._static_data_prefix)
        self.register_blueprint(self.mod_static_dir)

        # Uploader Sub-Module
        self.upload_work_dir = os.path.join(self.work_dir, "uploads")
        self.mod_upload = FileUploadMod('%s_uploader' % self.name,
                                        parent_app,
                                        self.upload_work_dir,
                                        url_prefix='/uploader')
        self.register_blueprint(self.mod_upload)

        # IQR Session control and resources
        # TODO: Move session management to database/remote?
        #       Create web-specific IqrSession class that stores/gets its state
        #       directly from database.
        self._iqr_controller = IqrController()
        # Mapping of session IDs to their work directory
        #: :type: dict[collections.Hashable, str]
        self._iqr_work_dirs = {}
        # Mapping of session ID to a dictionary of the custom example data for
        # a session (uuid -> DataElement)
        #: :type: dict[collections.Hashable, dict[collections.Hashable, smqtk.representation.DataElement]]
        self._iqr_example_data = {}
        # Descriptors of example data
        #: :type: dict[collections.Hashable, dict[collections.Hashable, smqtk.representation.DescriptorElement]]
        self._iqr_example_pos_descr = {}

        # Preview Image Caching
        self._preview_cache = PreviewCache(
            osp.join(self._static_data_dir, "previews"))

        # Cache mapping of written static files for data elements
        self._static_cache = {}
        self._static_cache_element = {}

        #
        # Routing
        #

        @self.route("/")
        @self._parent_app.module_login.login_required
        def index():
            r = {
                "module_name": self.name,
                "uploader_url": self.mod_upload.url_prefix,
                "uploader_post_url": self.mod_upload.upload_post_url(),
            }
            r.update(parent_app.nav_bar_content())
            # noinspection PyUnresolvedReferences
            return flask.render_template("iqr_search_index.html", **r)

        @self.route('/iqr_session_info', methods=["GET"])
        @self._parent_app.module_login.login_required
        def iqr_session_info():
            """
            Get information about the current IRQ session
            """
            with self.get_current_iqr_session() as iqrs:
                # noinspection PyProtectedMember
                return flask.jsonify({
                    "uuid":
                    iqrs.uuid,
                    "descriptor_type":
                    self._descriptor_generator.name,
                    "nn_index_type":
                    self._nn_index.name,
                    "relevancy_index_type":
                    self._rel_index_config['type'],
                    "positive_uids":
                    tuple(d.uuid() for d in iqrs.positive_descriptors),
                    "negative_uids":
                    tuple(d.uuid() for d in iqrs.negative_descriptors),

                    # UUIDs of example positive descriptors
                    "ex_pos":
                    tuple(self._iqr_example_pos_descr[iqrs.uuid]),
                    "initialized":
                    iqrs.working_index.count() > 0,
                    "index_size":
                    iqrs.working_index.count(),
                })

        @self.route('/get_iqr_state')
        @self._parent_app.module_login.login_required
        def iqr_session_state():
            """
            Get IQR session state information composed of positive and negative
            descriptor vectors.
            """
            with self.get_current_iqr_session() as iqrs:
                iqrs_uuid = str(iqrs.uuid)
                pos_elements = list(
                    set(
                        # Pos user examples
                        [
                            tuple(d.vector().tolist()) for d in
                            self._iqr_example_pos_descr[iqrs.uuid].values()
                        ] +
                        # Adjudicated examples
                        [
                            tuple(d.vector().tolist())
                            for d in iqrs.positive_descriptors
                        ], ))
                neg_elements = list(
                    set(
                        # No negative user example support yet
                        # Adjudicated examples
                        [
                            tuple(d.vector().tolist())
                            for d in iqrs.negative_descriptors
                        ], ))

            z_buffer = StringIO()
            z = zipfile.ZipFile(z_buffer, 'w', zipfile.ZIP_DEFLATED)
            z.writestr(
                iqrs_uuid,
                json.dumps({
                    'pos': pos_elements,
                    'neg': neg_elements,
                }))
            z.close()

            z_buffer.seek(0)

            return flask.send_file(
                z_buffer,
                mimetype='application/octet-stream',
                as_attachment=True,
                attachment_filename="%s.IqrState" % iqrs_uuid,
            )

        @self.route("/check_current_iqr_session")
        @self._parent_app.module_login.login_required
        def check_current_iqr_session():
            """
            Check that the current IQR session exists and is initialized.

            :rtype: {
                    success: bool
                }
            """
            # Getting the current IQR session ensures that one has been
            # constructed for the current session.
            with self.get_current_iqr_session():
                return flask.jsonify({"success": True})

        @self.route("/get_data_preview_image", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_ingest_item_image_rep():
            """
            Return the base64 preview image data for the data file associated
            with the give UID.
            """
            uid = flask.request.args['uid']

            info = {
                "success": True,
                "message": None,
                "shape": None,  # (width, height)
                "static_file_link": None,
                "static_preview_link": None,
            }

            # Try to find a DataElement by the given UUID in our indexed data
            # or in the session's example data.
            if self._data_set.has_uuid(uid):
                #: :type: smqtk.representation.DataElement
                de = self._data_set.get_data(uid)
            else:
                with self.get_current_iqr_session() as iqrs:
                    #: :type: smqtk.representation.DataElement | None
                    de = self._iqr_example_data[iqrs.uuid].get(uid, None)

            if not de:
                info["success"] = False
                info["message"] = "UUID not part of the active data set!"
            else:
                # Preview_path should be a path within our statically hosted
                # area.
                preview_path = self._preview_cache.get_preview_image(de)
                img = PIL.Image.open(preview_path)
                info["shape"] = img.size

                if de.uuid() not in self._static_cache:
                    self._static_cache[de.uuid()] = \
                        de.write_temp(self._static_data_dir)
                    self._static_cache_element[de.uuid()] = de

                # Need to format links by transforming the generated paths to
                # something usable by webpage:
                # - make relative to the static directory, and then pre-pending
                #   the known static url to the
                info["static_preview_link"] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(preview_path, self._static_data_dir)
                info['static_file_link'] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(self._static_cache[de.uuid()],
                                    self._static_data_dir)

            return flask.jsonify(info)

        @self.route('/iqr_ingest_file', methods=['POST'])
        @self._parent_app.module_login.login_required
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                with self.get_current_iqr_session() as iqrs:
                    fid = flask.request.form['fid']

                    self._log.debug(
                        "[%s::%s] Getting temporary filepath from "
                        "uploader module", iqrs.uuid, fid)
                    upload_filepath = self.mod_upload.get_path_for_id(fid)
                    self.mod_upload.clear_completed(fid)

                    self._log.debug("[%s::%s] Moving uploaded file", iqrs.uuid,
                                    fid)
                    sess_upload = osp.join(self._iqr_work_dirs[iqrs.uuid],
                                           osp.basename(upload_filepath))
                    os.rename(upload_filepath, sess_upload)
                    upload_data = DataFileElement(sess_upload)
                    uuid = upload_data.uuid()
                    self._iqr_example_data[iqrs.uuid][uuid] = upload_data

                    # Extend session ingest -- modifying
                    self._log.debug(
                        "[%s::%s] Adding new data to session "
                        "positives", iqrs.uuid, fid)
                    # iqrs.add_positive_data(upload_data)
                    try:
                        upload_descr = \
                            self._descriptor_generator.compute_descriptor(
                                upload_data, self._descr_elem_factory
                            )
                    except ValueError, ex:
                        return "Input Error: %s" % str(ex), 400

                    self._iqr_example_pos_descr[iqrs.uuid][uuid] = upload_descr
                    iqrs.adjudicate((upload_descr, ))

                    return str(uuid)

        @self.route("/iqr_initialize", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_initialize():
            """
            Initialize IQR session working index based on current positive
            examples and adjudications.
            """
            with self.get_current_iqr_session() as iqrs:
                try:
                    iqrs.update_working_index(self._nn_index)
                    return flask.jsonify({
                        "success": True,
                        "message": "Completed initialization",
                    })
                except Exception, ex:
                    return flask.jsonify({
                        "success":
                        False,
                        "message":
                        "ERROR: (%s) %s" % (type(ex).__name__, str(ex))
                    })
示例#4
0
class IqrSearch (SmqtkObject, flask.Blueprint, Configurable):
    """
    IQR Search Tab blueprint

    Components:
        * Data-set, from which base media data is provided
        * Descriptor generator, which provides descriptor generation services
          for user uploaded data.
        * NearestNeighborsIndex, from which descriptors are queried from user
          input data. This index should contain descriptors that were
          generated by the same descriptor generator configuration above (same
          dimensionality, etc.).
        * RelevancyIndex, which is populated by an initial query, and then
          iterated over within the same user session. A new instance and model
          is generated every time a new session is created (or new data is
          uploaded by the user).

    Assumes:
        * DescriptorElement related to a DataElement have the same UUIDs.

    """

    @classmethod
    def get_default_config(cls):
        d = super(IqrSearch, cls).get_default_config()

        # Remove parent_app slot for later explicit specification.
        del d['parent_app']

        # fill in plugin configs
        d['data_set'] = plugin.make_config(get_data_set_impls())

        d['descr_generator'] = \
            plugin.make_config(get_descriptor_generator_impls())

        d['nn_index'] = plugin.make_config(get_nn_index_impls())

        ri_config = plugin.make_config(get_relevancy_index_impls())
        if d['rel_index_config']:
            ri_config.update(d['rel_index_config'])
        d['rel_index_config'] = ri_config

        df_config = DescriptorElementFactory.get_default_config()
        if d['descriptor_factory']:
            df_config.update(d['descriptor_factory'].get_config())
        d['descriptor_factory'] = df_config

        return d

    # noinspection PyMethodOverriding
    @classmethod
    def from_config(cls, config, parent_app):
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        :param config: JSON compliant dictionary encapsulating
            a configuration.
        :type config: dict

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :return: Constructed instance from the provided config.
        :rtype: IqrSearch

        """
        merged = cls.get_default_config()
        merged.update(config)

        # construct nested objects via configurations
        merged['data_set'] = \
            plugin.from_plugin_config(merged['data_set'],
                                      get_data_set_impls())
        merged['descr_generator'] = \
            plugin.from_plugin_config(merged['descr_generator'],
                                      get_descriptor_generator_impls())
        merged['nn_index'] = \
            plugin.from_plugin_config(merged['nn_index'],
                                      get_nn_index_impls())

        merged['descriptor_factory'] = \
            DescriptorElementFactory.from_config(merged['descriptor_factory'])

        return cls(parent_app, **merged)

    def __init__(self, parent_app, name, data_set, descr_generator, nn_index,
                 working_directory, rel_index_config=DFLT_REL_INDEX_CONFIG,
                 descriptor_factory=DFLT_MEMORY_DESCR_FACTORY,
                 url_prefix=None, pos_seed_neighbors=500):
        """
        Initialize a generic IQR Search module with a single descriptor and
        indexer.

        :param name: Name of this blueprint instance
        :type name: str

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :param data_set: DataSet instance that references indexed data.
        :type data_set: SMQTK.representation.DataSet

        :param descr_generator: DescriptorGenerator instance to use in IQR
            sessions for generating descriptors on new data.
        :type descr_generator:
            smqtk.algorithms.descriptor_generator.DescriptorGenerator

        :param nn_index: NearestNeighborsIndex instance for sessions to pull
            their review data sets from.
        :type nn_index: smqtk.algorithms.NearestNeighborsIndex

        :param rel_index_config: Plugin configuration for the RelevancyIndex to
            use.
        :type rel_index_config: dict

        :param working_directory: Directory in which to place working files.
            These may be considered temporary and may be removed between
            executions of this app. Retention of a work directory may speed
            things up in subsequent runs because of caching.

        :param descriptor_factory: DescriptorElementFactory for producing new
            DescriptorElement instances when data is uploaded to the server.
        :type descriptor_factory: DescriptorElementFactory

        :param url_prefix: Web address prefix for this blueprint.
        :type url_prefix: str

        :param pos_seed_neighbors: Number of neighbors to pull from the given
            ``nn_index`` for each positive exemplar when populating the working
            index, i.e. this value determines the size of the working index for
            IQR refinement. By default, we try to get 500 neighbors.

            Since there may be partial to significant overlap of near neighbors
            as a result of nn_index queries for positive exemplars, the working
            index may contain anywhere from this value's number of entries, to
            ``N*P``, where ``N`` is this value and ``P`` is the number of
            positive examples at the time of working index initialization.
        :type pos_seed_neighbors: int

        :raises ValueError: Invalid Descriptor or indexer type

        """
        # make sure URL prefix starts with a slash
        if not url_prefix.startswith('/'):
            url_prefix = '/' + url_prefix

        super(IqrSearch, self).__init__(
            name, import_name=__name__,
            static_folder=os.path.join(SCRIPT_DIR, "static"),
            template_folder=os.path.join(SCRIPT_DIR, "templates"),
            url_prefix=url_prefix
        )

        self._parent_app = parent_app
        self._data_set = data_set
        self._descriptor_generator = descr_generator
        self._nn_index = nn_index
        self._rel_index_config = rel_index_config
        self._descr_elem_factory = descriptor_factory

        self._pos_seed_neighbors = int(pos_seed_neighbors)

        # base directory that's transformed by the ``work_dir`` property into
        # an absolute path.
        self._working_dir = working_directory
        # Directory to put things to allow them to be statically available to
        # public users.
        self._static_data_prefix = "static/data"
        self._static_data_dir = osp.join(self.work_dir, 'static')

        # Custom static host sub-module
        self.mod_static_dir = StaticDirectoryHost('%s_static' % self.name,
                                                  self._static_data_dir,
                                                  self._static_data_prefix)
        self.register_blueprint(self.mod_static_dir)

        # Uploader Sub-Module
        self.upload_work_dir = os.path.join(self.work_dir, "uploads")
        self.mod_upload = FileUploadMod('%s_uploader' % self.name, parent_app,
                                        self.upload_work_dir,
                                        url_prefix='/uploader')
        self.register_blueprint(self.mod_upload)

        # IQR Session control and resources
        # TODO: Move session management to database/remote?
        #       Create web-specific IqrSession class that stores/gets its state
        #       directly from database.
        self._iqr_controller = IqrController()
        # Mapping of session IDs to their work directory
        #: :type: dict[collections.Hashable, str]
        self._iqr_work_dirs = {}
        # Mapping of session ID to a dictionary of the custom example data for
        # a session (uuid -> DataElement)
        #: :type: dict[collections.Hashable, dict[collections.Hashable, smqtk.representation.DataElement]]
        self._iqr_example_data = {}
        # Descriptors of example data
        #: :type: dict[collections.Hashable, dict[collections.Hashable, smqtk.representation.DescriptorElement]]
        self._iqr_example_pos_descr = {}

        # Preview Image Caching
        self._preview_cache = PreviewCache(osp.join(self._static_data_dir,
                                                    "previews"))

        # Cache mapping of written static files for data elements
        self._static_cache = {}
        self._static_cache_element = {}

        #
        # Routing
        #

        @self.route("/")
        @self._parent_app.module_login.login_required
        def index():
            r = {
                "module_name": self.name,
                "uploader_url": self.mod_upload.url_prefix,
                "uploader_post_url": self.mod_upload.upload_post_url(),
            }
            r.update(parent_app.nav_bar_content())
            # noinspection PyUnresolvedReferences
            return flask.render_template("iqr_search_index.html", **r)

        @self.route('/iqr_session_info', methods=["GET"])
        @self._parent_app.module_login.login_required
        def iqr_session_info():
            """
            Get information about the current IRQ session
            """
            with self.get_current_iqr_session() as iqrs:
                # noinspection PyProtectedMember
                return flask.jsonify({
                    "uuid": iqrs.uuid,

                    "descriptor_type": self._descriptor_generator.name,
                    "nn_index_type": self._nn_index.name,
                    "relevancy_index_type": self._rel_index_config['type'],

                    "positive_uids":
                        tuple(d.uuid() for d in iqrs.positive_descriptors),
                    "negative_uids":
                        tuple(d.uuid() for d in iqrs.negative_descriptors),

                    # UUIDs of example positive descriptors
                    "ex_pos": tuple(self._iqr_example_pos_descr[iqrs.uuid]),

                    "initialized": iqrs.working_index.count() > 0,
                    "index_size": iqrs.working_index.count(),
                })

        @self.route('/get_iqr_state')
        @self._parent_app.module_login.login_required
        def iqr_session_state():
            """
            Get IQR session state information composed of positive and negative
            descriptor vectors.
            """
            with self.get_current_iqr_session() as iqrs:
                iqrs_uuid = str(iqrs.uuid)
                pos_elements = list(set(
                    # Pos user examples
                    [tuple(d.vector().tolist()) for d
                     in self._iqr_example_pos_descr[iqrs.uuid].values()] +
                    # Adjudicated examples
                    [tuple(d.vector().tolist()) for d
                     in iqrs.positive_descriptors],
                ))
                neg_elements = list(set(
                    # No negative user example support yet
                    # Adjudicated examples
                    [tuple(d.vector().tolist()) for d
                     in iqrs.negative_descriptors],
                ))

            z_buffer = StringIO()
            z = zipfile.ZipFile(z_buffer, 'w', zipfile.ZIP_DEFLATED)
            z.writestr(iqrs_uuid, json.dumps({
                'pos': pos_elements,
                'neg': neg_elements,
            }))
            z.close()

            z_buffer.seek(0)

            return flask.send_file(
                z_buffer,
                mimetype='application/octet-stream',
                as_attachment=True,
                attachment_filename="%s.IqrState" % iqrs_uuid,
            )

        @self.route("/check_current_iqr_session")
        @self._parent_app.module_login.login_required
        def check_current_iqr_session():
            """
            Check that the current IQR session exists and is initialized.

            :rtype: {
                    success: bool
                }
            """
            # Getting the current IQR session ensures that one has been
            # constructed for the current session.
            with self.get_current_iqr_session():
                return flask.jsonify({
                    "success": True
                })

        @self.route("/get_data_preview_image", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_ingest_item_image_rep():
            """
            Return the base64 preview image data for the data file associated
            with the give UID.
            """
            uid = flask.request.args['uid']

            info = {
                "success": True,
                "message": None,
                "shape": None,  # (width, height)
                "static_file_link": None,
                "static_preview_link": None,
            }

            # Try to find a DataElement by the given UUID in our indexed data
            # or in the session's example data.
            if self._data_set.has_uuid(uid):
                #: :type: smqtk.representation.DataElement
                de = self._data_set.get_data(uid)
            else:
                with self.get_current_iqr_session() as iqrs:
                    #: :type: smqtk.representation.DataElement | None
                    de = self._iqr_example_data[iqrs.uuid].get(uid, None)

            if not de:
                info["success"] = False
                info["message"] = "UUID not part of the active data set!"
            else:
                # Preview_path should be a path within our statically hosted
                # area.
                preview_path = self._preview_cache.get_preview_image(de)
                img = PIL.Image.open(preview_path)
                info["shape"] = img.size

                if de.uuid() not in self._static_cache:
                    self._static_cache[de.uuid()] = \
                        de.write_temp(self._static_data_dir)
                    self._static_cache_element[de.uuid()] = de

                # Need to format links by transforming the generated paths to
                # something usable by webpage:
                # - make relative to the static directory, and then pre-pending
                #   the known static url to the
                info["static_preview_link"] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(preview_path, self._static_data_dir)
                info['static_file_link'] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(self._static_cache[de.uuid()],
                                    self._static_data_dir)

            return flask.jsonify(info)

        @self.route('/iqr_ingest_file', methods=['POST'])
        @self._parent_app.module_login.login_required
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                with self.get_current_iqr_session() as iqrs:
                    fid = flask.request.form['fid']

                    self._log.debug("[%s::%s] Getting temporary filepath from "
                                    "uploader module", iqrs.uuid, fid)
                    upload_filepath = self.mod_upload.get_path_for_id(fid)
                    self.mod_upload.clear_completed(fid)

                    self._log.debug("[%s::%s] Moving uploaded file",
                                    iqrs.uuid, fid)
                    sess_upload = osp.join(self._iqr_work_dirs[iqrs.uuid],
                                           osp.basename(upload_filepath))
                    os.rename(upload_filepath, sess_upload)
                    upload_data = DataFileElement(sess_upload)
                    uuid = upload_data.uuid()
                    self._iqr_example_data[iqrs.uuid][uuid] = upload_data

                    # Extend session ingest -- modifying
                    self._log.debug("[%s::%s] Adding new data to session "
                                    "positives", iqrs.uuid, fid)
                    # iqrs.add_positive_data(upload_data)
                    try:
                        upload_descr = \
                            self._descriptor_generator.compute_descriptor(
                                upload_data, self._descr_elem_factory
                            )
                    except ValueError, ex:
                        return "Input Error: %s" % str(ex), 400

                    self._iqr_example_pos_descr[iqrs.uuid][uuid] = upload_descr
                    iqrs.adjudicate((upload_descr,))

                    return str(uuid)

        @self.route("/iqr_initialize", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_initialize():
            """
            Initialize IQR session working index based on current positive
            examples and adjudications.
            """
            with self.get_current_iqr_session() as iqrs:
                try:
                    iqrs.update_working_index(self._nn_index)
                    return flask.jsonify({
                        "success": True,
                        "message": "Completed initialization",
                    })
                except Exception, ex:
                    return flask.jsonify({
                        "success": False,
                        "message": "ERROR: (%s) %s" % (type(ex).__name__,
                                                       str(ex))
                    })
示例#5
0
class IqrSearch (SmqtkObject, flask.Flask, Configurable):
    """
    IQR Search Tab blueprint

    Components:
        * Data-set, from which base media data is provided
        * Descriptor generator, which provides descriptor generation services
          for user uploaded data.
        * NearestNeighborsIndex, from which descriptors are queried from user
          input data. This index should contain descriptors that were
          generated by the same descriptor generator configuration above (same
          dimensionality, etc.).
        * RelevancyIndex, which is populated by an initial query, and then
          iterated over within the same user session. A new instance and model
          is generated every time a new session is created (or new data is
          uploaded by the user).

    Assumes:
        * DescriptorElement related to a DataElement have the same UUIDs.

    """

    # TODO: User access white/black-list? See ``search_app/__init__.py``:L135

    @classmethod
    def get_default_config(cls):
        d = super(IqrSearch, cls).get_default_config()

        # Remove parent_app slot for later explicit specification.
        del d['parent_app']

        d['iqr_service_url'] = None

        # fill in plugin configs
        d['data_set'] = make_default_config(DataSet.get_impls())

        return d

    # noinspection PyMethodOverriding
    @classmethod
    def from_config(cls, config, parent_app):
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        :param config: JSON compliant dictionary encapsulating
            a configuration.
        :type config: dict

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :return: Constructed instance from the provided config.
        :rtype: IqrSearch

        """
        merged = cls.get_default_config()
        merged.update(config)

        # construct nested objects via configurations
        merged['data_set'] = \
            from_config_dict(merged['data_set'], DataSet.get_impls())

        return cls(parent_app, **merged)

    def __init__(self, parent_app, iqr_service_url, data_set,
                 working_directory):
        """
        Initialize a generic IQR Search module with a single descriptor and
        indexer.

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.IqrSearchDispatcher

        :param iqr_service_url: Base URL to the IQR service to use for this
            application interface. Any trailing slashes will be striped.
        :type iqr_service_url: str

        :param data_set: DataSet of the content described by indexed descriptors
            in the linked IQR service.
        :type data_set: smqtk.representation.DataSet

        :param working_directory: Directory in which to place working files.
            These may be considered temporary and may be removed between
            executions of this app.
        :type working_directory: str

        :raises ValueError: Invalid Descriptor or indexer type

        """
        super(IqrSearch, self).__init__(
            import_name=__name__,
            static_folder=os.path.join(SCRIPT_DIR, "static"),
            template_folder=os.path.join(SCRIPT_DIR, "templates"),
        )

        self._parent_app = parent_app
        self._data_set = data_set
        self._iqr_service = ServiceProxy(iqr_service_url.rstrip('/'))

        # base directory that's transformed by the ``work_dir`` property into
        # an absolute path.
        self._working_dir = working_directory
        # Directory to put things to allow them to be statically available to
        # public users.
        self._static_data_prefix = "static/data"
        self._static_data_dir = osp.join(self.work_dir, 'static')

        # Custom static host sub-module
        self.mod_static_dir = StaticDirectoryHost('%s_static' % self.name,
                                                  self._static_data_dir,
                                                  self._static_data_prefix)
        self.register_blueprint(self.mod_static_dir)

        # Uploader Sub-Module
        self.upload_work_dir = os.path.join(self.work_dir, "uploads")
        self.mod_upload = FileUploadMod('%s_uploader' % self.name, parent_app,
                                        self.upload_work_dir,
                                        url_prefix='/uploader')
        self.register_blueprint(self.mod_upload)
        self.register_blueprint(parent_app.module_login)

        # Mapping of session IDs to their work directory
        #: :type: dict[str, str]
        self._iqr_work_dirs: Dict[str, str] = {}
        # Mapping of session ID to a dictionary of the custom example data for
        # a session (uuid -> DataElement)
        self._iqr_example_data: Dict[
            str,
            Dict[Hashable, DataElement]
        ] = {}

        # Preview Image Caching
        self._preview_cache = PreviewCache(osp.join(self._static_data_dir,
                                                    "previews"))

        # Cache mapping of written static files for data elements
        self._static_cache: Dict[Hashable, str] = {}
        self._static_cache_element: Dict[Hashable, DataElement] = {}

        #
        # Routing
        #

        @self.route("/")
        @self._parent_app.module_login.login_required
        def index():
            # Stripping left '/' from blueprint modules in order to make sure
            # the paths are relative to our base.
            assert self.mod_upload.url_prefix is not None, (
                "Currently assuming the upload module has a non-None URL "
                "prefix."
            )
            r = {
                "module_name": self.name,
                "uploader_url": self.mod_upload.url_prefix.lstrip('/'),
                "uploader_post_url":
                    self.mod_upload.upload_post_url().lstrip('/'),
            }
            self._log.debug("Uploader URL: %s", r['uploader_url'])
            # noinspection PyUnresolvedReferences
            return flask.render_template("iqr_search_index.html", **r)

        @self.route('/iqr_session_info', methods=["GET"])
        @self._parent_app.module_login.login_required
        def iqr_session_info():
            """
            Get information about the current IRQ session
            """
            sid = self.get_current_iqr_session()
            get_r = self._iqr_service.get('session', sid=sid)
            get_r.raise_for_status()
            return flask.jsonify(get_r.json())

        @self.route('/get_iqr_state')
        @self._parent_app.module_login.login_required
        def iqr_session_state():
            """
            Get IQR session state information composed of positive and negative
            descriptor vectors.

            We append to the state received from the service in order to produce
            a state byte package that is compatible with the
            ``IqrSession.set_state_bytes`` method. This way state bytes received
            from this function can be directly consumed by the IQR service or
            other IqrSession instances.

            """
            sid = self.get_current_iqr_session()

            # Get the state base64 from the underlying service.
            r_get = self._iqr_service.get('state', sid=sid)
            r_get.raise_for_status()
            state_b64 = r_get.json()['state_b64']
            state_bytes = base64.b64decode(state_b64)

            # Load state dictionary from base-64 ZIP payload from service
            # - GET content is base64, so decode first and then read as a
            #   ZipFile buffer.
            # - `r_get.content` is `byte` type so it can be passed directly to
            #   base64 decode.
            state_dict = json.load(
                zipfile.ZipFile(
                    BytesIO(state_bytes),
                    'r',
                    IqrSession.STATE_ZIP_COMPRESSION
                ).open(IqrSession.STATE_ZIP_FILENAME)
            )
            r_get.close()

            # Wrap service state with our UI state: uploaded data elements.
            # Data elements are stored as a dictionary mapping UUID to MIMETYPE
            # and data byte string.
            working_data = {}
            sid_data_elems = self._iqr_example_data.get(sid, {})
            for uid, workingElem in six.iteritems(sid_data_elems):
                # Decoding base64 as ASCII knowing that
                # `base64.urlsafe_b64decode` is used later, whose doc-string
                # states that it may expect an ASCII string when not bytes.
                working_data[uid] = {
                    'content_type': workingElem.content_type(),
                    'bytes_base64':
                        base64.b64encode(workingElem.get_bytes())
                              .decode('ascii'),
                }

            state_dict["working_data"] = working_data
            state_json = json.dumps(state_dict)

            z_wrapper_buffer = BytesIO()
            z_wrapper = zipfile.ZipFile(z_wrapper_buffer, 'w',
                                        IqrSession.STATE_ZIP_COMPRESSION)
            z_wrapper.writestr(IqrSession.STATE_ZIP_FILENAME, state_json)
            z_wrapper.close()

            z_wrapper_buffer.seek(0)
            return flask.send_file(
                z_wrapper_buffer,
                mimetype='application/octet-stream',
                as_attachment=True,
                attachment_filename="%s.IqrState" % sid
            )

        @self.route('/set_iqr_state', methods=['PUT'])
        @self._parent_app.module_login.login_required
        def set_iqr_session_state():
            """
            Set the current state based on the given state file.
            """
            sid = self.get_current_iqr_session()
            fid = flask.request.form.get('fid', None)

            return_obj: Dict[str, Any] = {
                'success': False,
            }

            #
            # Load in state zip package, prepare zip package for service
            #

            if fid is None:
                return_obj['message'] = 'No file ID provided.'

            self._log.debug("[%s::%s] Getting temporary filepath from "
                            "uploader module", sid, fid)
            upload_filepath = self.mod_upload.get_path_for_id(fid)
            self.mod_upload.clear_completed(fid)

            # Load ZIP package back in, then remove the uploaded file.
            try:
                z = zipfile.ZipFile(
                    upload_filepath,
                    compression=IqrSession.STATE_ZIP_COMPRESSION
                )
                with z.open(IqrSession.STATE_ZIP_FILENAME) as f:
                    state_dict = json.load(f)
                z.close()
            finally:
                os.remove(upload_filepath)

            #
            # Consume working data UUID/bytes
            #
            # Reset this server's resources for an SID
            self.reset_session_local(sid)
            # - Dictionary of data UUID (SHA1) to {'content_type': <str>,
            #   'bytes_base64': <str>} dictionary.
            #: :type: dict[str, dict]
            working_data = state_dict['working_data']
            del state_dict['working_data']
            # - Write out base64-decoded files to session-specific work
            #   directory.
            # - Update self._iqr_example_data with DataFileElement instances
            #   referencing the just-written files.
            for uuid_sha1 in working_data:
                data_mimetype = working_data[uuid_sha1]['content_type']
                data_b64 = str(working_data[uuid_sha1]['bytes_base64'])
                # Output file to working directory on disk.
                data_filepath = os.path.join(
                    self._iqr_work_dirs[sid],
                    '%s%s' % (uuid_sha1, MT.guess_extension(data_mimetype))
                )
                with open(data_filepath, 'wb') as f:
                    f.write(base64.urlsafe_b64decode(data_b64))
                # Create element reference and store it for the current session.
                data_elem = DataFileElement(data_filepath, readonly=True)
                self._iqr_example_data[sid][uuid_sha1] = data_elem

            #
            # Re-package service state as a ZIP payload.
            #
            service_zip_buffer = BytesIO()
            service_zip = zipfile.ZipFile(service_zip_buffer, 'w',
                                          IqrSession.STATE_ZIP_COMPRESSION)
            service_zip.writestr(IqrSession.STATE_ZIP_FILENAME,
                                 json.dumps(state_dict))
            service_zip.close()
            service_zip_base64 = \
                base64.b64encode(service_zip_buffer.getvalue())

            # Update service state
            self._iqr_service.put('state',
                                  sid=sid,
                                  state_base64=service_zip_base64)

            return flask.jsonify(return_obj)

        @self.route("/check_current_iqr_session")
        @self._parent_app.module_login.login_required
        def check_current_iqr_session():
            """
            Check that the current IQR session exists and is initialized.

            Return JSON:
                success
                    Always True if the message returns.

            """
            # Getting the current IQR session ensures that one has been
            # constructed for the current session.
            _ = self.get_current_iqr_session()
            return flask.jsonify({
                "success": True
            })

        @self.route("/get_data_preview_image", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_ingest_item_image_rep():
            """
            Return the base64 preview image data link for the data file
            associated with the give UID (plus some other metadata).
            """
            uid = flask.request.args['uid']

            info: Dict[str, Any] = {
                "success": True,
                "message": None,
                "shape": None,  # (width, height)
                "static_file_link": None,
                "static_preview_link": None,
            }

            # Try to find a DataElement by the given UUID in our indexed data
            # or in the session's example data.
            if self._data_set.has_uuid(uid):
                #: :type: smqtk.representation.DataElement
                de = self._data_set.get_data(uid)
            else:
                sid = self.get_current_iqr_session()
                #: :type: smqtk.representation.DataElement | None
                de = self._iqr_example_data[sid].get(uid, None)

            if not de:
                info["success"] = False
                info["message"] = "UUID '%s' not part of the base or working " \
                                  "data set!" % uid
            else:
                # Preview_path should be a path within our statically hosted
                # area.
                preview_path = self._preview_cache.get_preview_image(de)
                img = PIL.Image.open(preview_path)
                info["shape"] = img.size

                if de.uuid() not in self._static_cache:
                    self._static_cache[de.uuid()] = \
                        de.write_temp(self._static_data_dir)
                    self._static_cache_element[de.uuid()] = de

                # Need to format links by transforming the generated paths to
                # something usable by webpage:
                # - make relative to the static directory, and then pre-pending
                #   the known static url to the
                info["static_preview_link"] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(preview_path, self._static_data_dir)
                info['static_file_link'] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(self._static_cache[de.uuid()],
                                    self._static_data_dir)

            return flask.jsonify(info)

        @self.route('/iqr_ingest_file', methods=['POST'])
        @self._parent_app.module_login.login_required
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            fid = flask.request.form['fid']

            sid = self.get_current_iqr_session()

            self._log.debug("[%s::%s] Getting temporary filepath from "
                            "uploader module", sid, fid)
            upload_filepath = self.mod_upload.get_path_for_id(fid)
            self.mod_upload.clear_completed(fid)

            self._log.debug("[%s::%s] Moving uploaded file",
                            sid, fid)
            sess_upload = osp.join(self._iqr_work_dirs[sid],
                                   osp.basename(upload_filepath))
            os.rename(upload_filepath, sess_upload)

            # Record uploaded data as user example data for this session.
            upload_data = DataFileElement(sess_upload)
            uuid = upload_data.uuid()
            self._iqr_example_data[sid][uuid] = upload_data

            # Extend session ingest -- modifying
            self._log.debug("[%s::%s] Adding new data to session "
                            "external positives", sid, fid)
            data_b64 = base64.b64encode(upload_data.get_bytes())
            data_ct = upload_data.content_type()
            r = self._iqr_service.post('add_external_pos', sid=sid,
                                       base64=data_b64, content_type=data_ct)
            r.raise_for_status()

            return str(uuid)

        @self.route("/iqr_initialize", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_initialize():
            """
            Initialize IQR session working index based on current positive
            examples and adjudications.
            """
            sid = self.get_current_iqr_session()

            # (Re)Initialize working index
            post_r = self._iqr_service.post('initialize', sid=sid)
            post_r.raise_for_status()

            return flask.jsonify(post_r.json())

        @self.route("/get_example_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_example_adjudication():
            """
            Get positive/negative status for a data/descriptor in our example
            set.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }

            """
            # TODO: Collapse example and index adjudication endpoints.
            elem_uuid = flask.request.args['uid']
            sid = self.get_current_iqr_session()
            get_r = self._iqr_service.get('adjudicate', sid=sid, uid=elem_uuid)
            get_r.raise_for_status()
            get_r_json = get_r.json()
            return flask.jsonify({
                "is_pos": get_r_json['is_pos'],
                "is_neg": get_r_json['is_neg'],
            })

        @self.route("/get_index_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_index_adjudication():
            """
            Get the adjudication status of a particular data/descriptor element
            by UUID.

            This should only ever return a dict where one of the two, or
            neither, are labeled True.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }
            """
            # TODO: Collapse example and index adjudication endpoints.
            elem_uuid = flask.request.args['uid']
            sid = self.get_current_iqr_session()
            get_r = self._iqr_service.get('adjudicate', sid=sid, uid=elem_uuid)
            get_r.raise_for_status()
            get_r_json = get_r.json()
            return flask.jsonify({
                "is_pos": get_r_json['is_pos'],
                "is_neg": get_r_json['is_neg'],
            })

        @self.route("/adjudicate", methods=["POST"])
        @self._parent_app.module_login.login_required
        def adjudicate():
            """
            Update adjudication for this session. This should specify UUIDs of
            data/descriptor elements in our working index.

            :return: {
                    success: <bool>,
                    message: <str>
                }
            """
            pos_to_add = json.loads(flask.request.form.get('add_pos', '[]'))
            pos_to_remove = json.loads(flask.request.form.get('remove_pos',
                                                              '[]'))
            neg_to_add = json.loads(flask.request.form.get('add_neg', '[]'))
            neg_to_remove = json.loads(flask.request.form.get('remove_neg',
                                                              '[]'))

            msg = "Adjudicated Positive{+%s, -%s}, " \
                  "Negative{+%s, -%s} " \
                  % (pos_to_add, pos_to_remove,
                     neg_to_add, neg_to_remove)
            self._log.debug(msg)

            sid = self.get_current_iqr_session()

            to_neutral = list(set(pos_to_remove) | set(neg_to_remove))

            post_r = self._iqr_service.post('adjudicate',
                                            sid=sid,
                                            pos=json.dumps(pos_to_add),
                                            neg=json.dumps(neg_to_add),
                                            neutral=json.dumps(to_neutral))
            post_r.raise_for_status()

            return flask.jsonify({
                "success": True,
                "message": msg
            })

        @self.route("/iqr_refine", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_refine():
            """
            Classify current IQR session indexer, updating ranking for
            display.

            Fails gracefully if there are no positive[/negative] adjudications.

            """
            sid = self.get_current_iqr_session()
            post_r = self._iqr_service.post('refine', sid=sid)
            post_r.raise_for_status()
            return flask.jsonify({
                "success": True,
                "message": "Completed refinement",
            })

        @self.route("/iqr_ordered_results", methods=['GET'])
        @self._parent_app.module_login.login_required
        def get_ordered_results():
            """
            Get ordered (UID, probability) pairs in between the given indices,
            [i, j). If j Is beyond the end of available results, only available
            results are returned.

            This may be empty if no refinement has yet occurred.

            Return format:
            {
                results: [ (uid, probability), ... ]
            }
            """
            i = flask.request.args.get('i', None)
            j = flask.request.args.get('j', None)

            params = {
                'sid': self.get_current_iqr_session(),
            }
            if i is not None:
                params['i'] = int(i)
            if j is not None:
                params['j'] = int(j)

            get_r = self._iqr_service.get('get_results', **params)
            get_r.raise_for_status()
            return flask.jsonify(get_r.json())

        @self.route("/reset_iqr_session", methods=["POST"])
        @self._parent_app.module_login.login_required
        def reset_iqr_session():
            """
            Reset the current IQR session
            """
            sid = self.get_current_iqr_session()
            # Reset service
            put_r = self._iqr_service.put('session', sid=sid)
            put_r.raise_for_status()
            # Reset local server resources
            self.reset_session_local(sid)
            return flask.jsonify({"success": True})

        @self.route("/get_random_uids")
        @self._parent_app.module_login.login_required
        def get_random_uids():
            """
            Return to the client a list of data/descriptor IDs available in the
            configured data set (NOT descriptor/NNI set).

            Thus, we assume that the nearest neighbor index that is searchable
            is from at least this set of data.

            :return: {
                    uids: list[str]
                }
            """
            all_ids = list(self._data_set.uuids())
            random.shuffle(all_ids)
            return flask.jsonify({
                "uids": all_ids
            })

        @self.route('/is_ready')
        def is_ready():
            """ Simple 'I'm alive' endpoint """
            return flask.jsonify({
                "alive": True,
            })

    def __del__(self):
        for wdir in self._iqr_work_dirs.values():
            if os.path.isdir(wdir):
                shutil.rmtree(wdir)

    def get_config(self):
        return {
            'iqr_service_url': self._iqr_service.url,
            'working_directory': self._working_dir,
            'data_set': to_config_dict(self._data_set),
        }

    @property
    def work_dir(self):
        """
        :return: Common work directory for this instance.
        :rtype: str
        """
        return osp.expanduser(osp.abspath(self._working_dir))

    def get_current_iqr_session(self):
        """
        Get the current IQR Session UUID.

        :rtype: str

        """
        sid = str(flask.session.sid)

        # Ensure there is an initialized session on the configured service.
        created_session = False
        get_r = self._iqr_service.get('session_ids')
        get_r.raise_for_status()
        if sid not in get_r.json()['session_uuids']:
            post_r = self._iqr_service.post('session', sid=sid)
            post_r.raise_for_status()
            created_session = True

        if created_session or (sid not in self._iqr_work_dirs):
            # Dictionaries not initialized yet for this UUID.
            self._iqr_work_dirs[sid] = osp.join(self.work_dir, sid)
            self._iqr_example_data[sid] = {}

            safe_create_dir(self._iqr_work_dirs[sid])

        return sid

    def reset_session_local(self, sid):
        """
        Reset elements of this server for a given session ID.

        A given ``sid`` must have been created first. This happens in the
        ``get_current_iqr_session`` method.

        This does not affect the linked IQR service.

        :param sid: Session ID to reset for.
        :type sid: str

        :raises KeyError: ``sid`` not recognized. Probably not initialized
            first.

        """
        # Also clear work sub-directory and example data state
        if os.path.isdir(self._iqr_work_dirs[sid]):
            shutil.rmtree(self._iqr_work_dirs[sid])
        safe_create_dir(self._iqr_work_dirs[sid])

        self._iqr_example_data[sid].clear()
示例#6
0
class IqrSearch(SmqtkObject, flask.Flask, Configurable):
    """
    IQR Search Tab blueprint

    Components:
        * Data-set, from which base media data is provided
        * Descriptor generator, which provides descriptor generation services
          for user uploaded data.
        * NearestNeighborsIndex, from which descriptors are queried from user
          input data. This index should contain descriptors that were
          generated by the same descriptor generator configuration above (same
          dimensionality, etc.).
        * RelevancyIndex, which is populated by an initial query, and then
          iterated over within the same user session. A new instance and model
          is generated every time a new session is created (or new data is
          uploaded by the user).

    Assumes:
        * DescriptorElement related to a DataElement have the same UUIDs.

    """

    # TODO: User access white/black-list? See ``search_app/__init__.py``:L135

    @classmethod
    def get_default_config(cls):
        d = super(IqrSearch, cls).get_default_config()

        # Remove parent_app slot for later explicit specification.
        del d['parent_app']

        # fill in plugin configs
        d['data_set'] = plugin.make_config(get_data_set_impls())

        d['descr_generator'] = \
            plugin.make_config(get_descriptor_generator_impls())

        d['nn_index'] = plugin.make_config(get_nn_index_impls())

        ri_config = plugin.make_config(get_relevancy_index_impls())
        if d['rel_index_config']:
            ri_config.update(d['rel_index_config'])
        d['rel_index_config'] = ri_config

        df_config = DescriptorElementFactory.get_default_config()
        if d['descriptor_factory']:
            df_config.update(d['descriptor_factory'].get_config())
        d['descriptor_factory'] = df_config

        return d

    # noinspection PyMethodOverriding
    @classmethod
    def from_config(cls, config, parent_app):
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        :param config: JSON compliant dictionary encapsulating
            a configuration.
        :type config: dict

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :return: Constructed instance from the provided config.
        :rtype: IqrSearch

        """
        merged = cls.get_default_config()
        merged.update(config)

        # construct nested objects via configurations
        merged['data_set'] = \
            plugin.from_plugin_config(merged['data_set'],
                                      get_data_set_impls())
        merged['descr_generator'] = \
            plugin.from_plugin_config(merged['descr_generator'],
                                      get_descriptor_generator_impls())
        merged['nn_index'] = \
            plugin.from_plugin_config(merged['nn_index'],
                                      get_nn_index_impls())

        merged['descriptor_factory'] = \
            DescriptorElementFactory.from_config(merged['descriptor_factory'])

        return cls(parent_app, **merged)

    def __init__(self,
                 parent_app,
                 data_set,
                 descr_generator,
                 nn_index,
                 working_directory,
                 rel_index_config=DFLT_REL_INDEX_CONFIG,
                 descriptor_factory=DFLT_DESCRIPTOR_FACTORY,
                 pos_seed_neighbors=500):
        """
        Initialize a generic IQR Search module with a single descriptor and
        indexer.

        :param name: Name of this blueprint instance
        :type name: str

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.IqrSearchDispatcher

        :param data_set: DataSet instance that references indexed data.
        :type data_set: SMQTK.representation.DataSet

        :param descr_generator: DescriptorGenerator instance to use in IQR
            sessions for generating descriptors on new data.
        :type descr_generator:
            smqtk.algorithms.descriptor_generator.DescriptorGenerator

        :param nn_index: NearestNeighborsIndex instance for sessions to pull
            their review data sets from.
        :type nn_index: smqtk.algorithms.NearestNeighborsIndex

        :param rel_index_config: Plugin configuration for the RelevancyIndex to
            use.
        :type rel_index_config: dict

        :param working_directory: Directory in which to place working files.
            These may be considered temporary and may be removed between
            executions of this app. Retention of a work directory may speed
            things up in subsequent runs because of caching.

        :param descriptor_factory: DescriptorElementFactory for producing new
            DescriptorElement instances when data is uploaded to the server.
        :type descriptor_factory: DescriptorElementFactory

        :param url_prefix: Web address prefix for this blueprint.
        :type url_prefix: str

        :param pos_seed_neighbors: Number of neighbors to pull from the given
            ``nn_index`` for each positive exemplar when populating the working
            index, i.e. this value determines the size of the working index for
            IQR refinement. By default, we try to get 500 neighbors.

            Since there may be partial to significant overlap of near neighbors
            as a result of nn_index queries for positive exemplars, the working
            index may contain anywhere from this value's number of entries, to
            ``N*P``, where ``N`` is this value and ``P`` is the number of
            positive examples at the time of working index initialization.
        :type pos_seed_neighbors: int

        :raises ValueError: Invalid Descriptor or indexer type

        """
        super(IqrSearch, self).__init__(
            import_name=__name__,
            static_folder=os.path.join(SCRIPT_DIR, "static"),
            template_folder=os.path.join(SCRIPT_DIR, "templates"),
        )

        self._parent_app = parent_app
        self._data_set = data_set
        self._descriptor_generator = descr_generator
        self._nn_index = nn_index
        self._rel_index_config = rel_index_config
        self._descr_elem_factory = descriptor_factory

        self._pos_seed_neighbors = int(pos_seed_neighbors)

        # base directory that's transformed by the ``work_dir`` property into
        # an absolute path.
        self._working_dir = working_directory
        # Directory to put things to allow them to be statically available to
        # public users.
        self._static_data_prefix = "static/data"
        self._static_data_dir = osp.join(self.work_dir, 'static')

        # Custom static host sub-module
        self.mod_static_dir = StaticDirectoryHost('%s_static' % self.name,
                                                  self._static_data_dir,
                                                  self._static_data_prefix)
        self.register_blueprint(self.mod_static_dir)

        # Uploader Sub-Module
        self.upload_work_dir = os.path.join(self.work_dir, "uploads")
        self.mod_upload = FileUploadMod('%s_uploader' % self.name,
                                        parent_app,
                                        self.upload_work_dir,
                                        url_prefix='/uploader')
        self.register_blueprint(self.mod_upload)
        self.register_blueprint(parent_app.module_login)

        # IQR Session control and resources
        # TODO: Move session management to database/remote?
        #       Create web-specific IqrSession class that stores/gets its state
        #       directly from database.
        self._iqr_controller = IqrController()
        # Mapping of session IDs to their work directory
        #: :type: dict[collections.Hashable, str]
        self._iqr_work_dirs = {}
        # Mapping of session ID to a dictionary of the custom example data for
        # a session (uuid -> DataElement)
        #: :type: dict[collections.Hashable, dict[collections.Hashable, smqtk.representation.DataElement]]
        self._iqr_example_data = {}
        # Descriptors of example data
        #: :type: dict[collections.Hashable, dict[collections.Hashable, smqtk.representation.DescriptorElement]]
        self._iqr_example_pos_descr = {}

        # Preview Image Caching
        self._preview_cache = PreviewCache(
            osp.join(self._static_data_dir, "previews"))

        # Cache mapping of written static files for data elements
        self._static_cache = {}
        self._static_cache_element = {}

        #
        # Routing
        #

        @self.route("/")
        @self._parent_app.module_login.login_required
        def index():
            # Stripping left '/' from blueprint modules in order to make sure
            # the paths are relative to our base.
            r = {
                "module_name": self.name,
                "uploader_url": self.mod_upload.url_prefix.lstrip('/'),
                "uploader_post_url":
                self.mod_upload.upload_post_url().lstrip('/'),
            }
            self._log.debug("Uploader URL: %s", r['uploader_url'])
            # noinspection PyUnresolvedReferences
            return flask.render_template("iqr_search_index.html", **r)

        @self.route('/iqr_session_info', methods=["GET"])
        @self._parent_app.module_login.login_required
        def iqr_session_info():
            """
            Get information about the current IRQ session
            """
            with self.get_current_iqr_session() as iqrs:
                # noinspection PyProtectedMember
                return flask.jsonify({
                    "uuid":
                    iqrs.uuid,
                    "descriptor_type":
                    self._descriptor_generator.name,
                    "nn_index_type":
                    self._nn_index.name,
                    "relevancy_index_type":
                    self._rel_index_config['type'],
                    "positive_uids":
                    tuple(d.uuid() for d in iqrs.positive_descriptors),
                    "negative_uids":
                    tuple(d.uuid() for d in iqrs.negative_descriptors),

                    # UUIDs of example positive descriptors
                    "ex_pos":
                    tuple(self._iqr_example_pos_descr[iqrs.uuid]),
                    "ex_neg": (),  # No user negative examples supported yet
                    "initialized":
                    iqrs.working_index.count() > 0,
                    "index_size":
                    iqrs.working_index.count(),
                })

        @self.route('/get_iqr_state')
        @self._parent_app.module_login.login_required
        def iqr_session_state():
            """
            Get IQR session state information composed of positive and negative
            descriptor vectors.
            """
            with self.get_current_iqr_session() as iqrs:
                iqrs_uuid = str(iqrs.uuid)
                pos_elements = list(
                    set(
                        # Pos user examples
                        [
                            tuple(d.vector().tolist()) for d in
                            self._iqr_example_pos_descr[iqrs.uuid].values()
                        ] +
                        # Adjudicated examples
                        [
                            tuple(d.vector().tolist())
                            for d in iqrs.positive_descriptors
                        ], ))
                neg_elements = list(
                    set(
                        # No negative user example support yet
                        # Adjudicated examples
                        [
                            tuple(d.vector().tolist())
                            for d in iqrs.negative_descriptors
                        ], ))

            z_buffer = StringIO()
            z = zipfile.ZipFile(z_buffer, 'w', zipfile.ZIP_DEFLATED)
            z.writestr(
                iqrs_uuid,
                json.dumps({
                    'pos': pos_elements,
                    'neg': neg_elements,
                }))
            z.close()

            z_buffer.seek(0)

            return flask.send_file(
                z_buffer,
                mimetype='application/octet-stream',
                as_attachment=True,
                attachment_filename="%s.IqrState" % iqrs_uuid,
            )

        @self.route("/check_current_iqr_session")
        @self._parent_app.module_login.login_required
        def check_current_iqr_session():
            """
            Check that the current IQR session exists and is initialized.

            :rtype: {
                    success: bool
                }
            """
            # Getting the current IQR session ensures that one has been
            # constructed for the current session.
            with self.get_current_iqr_session():
                return flask.jsonify({"success": True})

        @self.route("/get_data_preview_image", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_ingest_item_image_rep():
            """
            Return the base64 preview image data for the data file associated
            with the give UID.
            """
            uid = flask.request.args['uid']

            info = {
                "success": True,
                "message": None,
                "shape": None,  # (width, height)
                "static_file_link": None,
                "static_preview_link": None,
            }

            # Try to find a DataElement by the given UUID in our indexed data
            # or in the session's example data.
            if self._data_set.has_uuid(uid):
                #: :type: smqtk.representation.DataElement
                de = self._data_set.get_data(uid)
            else:
                with self.get_current_iqr_session() as iqrs:
                    #: :type: smqtk.representation.DataElement | None
                    de = self._iqr_example_data[iqrs.uuid].get(uid, None)

            if not de:
                info["success"] = False
                info["message"] = "UUID not part of the active data set!"
            else:
                # Preview_path should be a path within our statically hosted
                # area.
                preview_path = self._preview_cache.get_preview_image(de)
                img = PIL.Image.open(preview_path)
                info["shape"] = img.size

                if de.uuid() not in self._static_cache:
                    self._static_cache[de.uuid()] = \
                        de.write_temp(self._static_data_dir)
                    self._static_cache_element[de.uuid()] = de

                # Need to format links by transforming the generated paths to
                # something usable by webpage:
                # - make relative to the static directory, and then pre-pending
                #   the known static url to the
                info["static_preview_link"] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(preview_path, self._static_data_dir)
                info['static_file_link'] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(self._static_cache[de.uuid()],
                                    self._static_data_dir)

            return flask.jsonify(info)

        @self.route('/iqr_ingest_file', methods=['POST'])
        @self._parent_app.module_login.login_required
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                with self.get_current_iqr_session() as iqrs:
                    fid = flask.request.form['fid']

                    self._log.debug(
                        "[%s::%s] Getting temporary filepath from "
                        "uploader module", iqrs.uuid, fid)
                    upload_filepath = self.mod_upload.get_path_for_id(fid)
                    self.mod_upload.clear_completed(fid)

                    self._log.debug("[%s::%s] Moving uploaded file", iqrs.uuid,
                                    fid)
                    sess_upload = osp.join(self._iqr_work_dirs[iqrs.uuid],
                                           osp.basename(upload_filepath))
                    os.rename(upload_filepath, sess_upload)
                    upload_data = DataFileElement(sess_upload)
                    uuid = upload_data.uuid()
                    self._iqr_example_data[iqrs.uuid][uuid] = upload_data

                    # Extend session ingest -- modifying
                    self._log.debug(
                        "[%s::%s] Adding new data to session "
                        "positives", iqrs.uuid, fid)
                    # iqrs.add_positive_data(upload_data)
                    try:
                        upload_descr = \
                            self._descriptor_generator.compute_descriptor(
                                upload_data, self._descr_elem_factory
                            )
                    except ValueError as ex:
                        return "Input Error: %s" % str(ex), 400

                    self._iqr_example_pos_descr[iqrs.uuid][uuid] = upload_descr
                    iqrs.adjudicate((upload_descr, ))

                    return str(uuid)

        @self.route("/iqr_initialize", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_initialize():
            """
            Initialize IQR session working index based on current positive
            examples and adjudications.
            """
            with self.get_current_iqr_session() as iqrs:
                try:
                    iqrs.update_working_index(self._nn_index)
                    return flask.jsonify({
                        "success": True,
                        "message": "Completed initialization",
                    })
                except Exception as ex:
                    return flask.jsonify({
                        "success":
                        False,
                        "message":
                        "ERROR: (%s) %s" % (type(ex).__name__, str(ex))
                    })

        @self.route("/get_example_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_example_adjudication():
            """
            Get positive/negative status for a data/descriptor in our example
            set.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }

            """
            elem_uuid = flask.request.args['uid']
            with self.get_current_iqr_session() as iqrs:
                is_p = elem_uuid in self._iqr_example_pos_descr[iqrs.uuid]
                # Currently no negative example support
                is_n = False

                return flask.jsonify({
                    "is_pos": is_p,
                    "is_neg": is_n,
                })

        @self.route("/get_index_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_index_adjudication():
            """
            Get the adjudication status of a particular data/descriptor element
            by UUID.

            This should only ever return a dict where one of the two, or
            neither, are labeled True.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }
            """
            elem_uuid = flask.request.args['uid']
            with self.get_current_iqr_session() as iqrs:
                is_p = (elem_uuid
                        in set(d.uuid() for d in iqrs.positive_descriptors))
                is_n = (elem_uuid
                        in set(d.uuid() for d in iqrs.negative_descriptors))

                return flask.jsonify({
                    "is_pos": is_p,
                    "is_neg": is_n,
                })

        @self.route("/adjudicate", methods=["POST", "GET"])
        @self._parent_app.module_login.login_required
        def adjudicate():
            """
            Update adjudication for this session. This should specify UUIDs of
            data/descriptor elements in our working index.

            :return: {
                    success: <bool>,
                    message: <str>
                }
            """
            if flask.request.method == "POST":
                fetch = flask.request.form
            elif flask.request.method == "GET":
                fetch = flask.request.args
            else:
                raise RuntimeError("Invalid request method '%s'" %
                                   flask.request.method)

            pos_to_add = json.loads(fetch.get('add_pos', '[]'))
            pos_to_remove = json.loads(fetch.get('remove_pos', '[]'))
            neg_to_add = json.loads(fetch.get('add_neg', '[]'))
            neg_to_remove = json.loads(fetch.get('remove_neg', '[]'))

            self._log.debug(
                "Adjudicated Positive{+%s, -%s}, "
                "Negative{+%s, -%s} " %
                (pos_to_add, pos_to_remove, neg_to_add, neg_to_remove))

            with self.get_current_iqr_session() as iqrs:
                iqrs.adjudicate(
                    tuple(iqrs.working_index.get_many_descriptors(pos_to_add)),
                    tuple(iqrs.working_index.get_many_descriptors(neg_to_add)),
                    tuple(
                        iqrs.working_index.get_many_descriptors(
                            pos_to_remove)),
                    tuple(
                        iqrs.working_index.get_many_descriptors(
                            neg_to_remove)),
                )
                self._log.debug("Now positive UUIDs: %s",
                                iqrs.positive_descriptors)
                self._log.debug("Now negative UUIDs: %s",
                                iqrs.negative_descriptors)

            return flask.jsonify({
                "success":
                True,
                "message":
                "Adjudicated Positive{+%s, -%s}, "
                "Negative{+%s, -%s} " %
                (pos_to_add, pos_to_remove, neg_to_add, neg_to_remove)
            })

        @self.route("/iqr_refine", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_refine():
            """
            Classify current IQR session indexer, updating ranking for
            display.

            Fails gracefully if there are no positive[/negative] adjudications.

            """
            with self.get_current_iqr_session() as iqrs:
                try:
                    iqrs.refine()
                    return flask.jsonify({
                        "success": True,
                        "message": "Completed refinement"
                    })
                except Exception as ex:
                    return flask.jsonify({
                        "success":
                        False,
                        "message":
                        "ERROR: (%s) %s" % (type(ex).__name__, str(ex))
                    })

        @self.route("/iqr_ordered_results", methods=['GET'])
        @self._parent_app.module_login.login_required
        def get_ordered_results():
            """
            Get ordered (UID, probability) pairs in between the given indices,
            [i, j). If j Is beyond the end of available results, only available
            results are returned.

            This may be empty if no refinement has yet occurred.

            Return format:
            {
                results: [ (uid, probability), ... ]
            }
            """
            with self.get_current_iqr_session() as iqrs:
                i = int(flask.request.args.get('i', 0))
                j = int(
                    flask.request.args.get(
                        'j',
                        len(iqrs.results) if iqrs.results else 0))
                #: :type: tuple[(smqtk.representation.DescriptorElement, float)]
                r = (iqrs.ordered_results() or ())[i:j]
                return flask.jsonify(
                    {"results": [(d.uuid(), p) for d, p in r]})

        @self.route("/reset_iqr_session", methods=["GET"])
        @self._parent_app.module_login.login_required
        def reset_iqr_session():
            """
            Reset the current IQR session
            """
            with self.get_current_iqr_session() as iqrs:
                iqrs.reset()

                # Clearing working directory
                if os.path.isdir(self._iqr_work_dirs[iqrs.uuid]):
                    shutil.rmtree(self._iqr_work_dirs[iqrs.uuid])
                safe_create_dir(self._iqr_work_dirs[iqrs.uuid])

                # Clearing example data + descriptors
                self._iqr_example_data[iqrs.uuid].clear()
                self._iqr_example_pos_descr[iqrs.uuid].clear()

                return flask.jsonify({"success": True})

        @self.route("/get_random_uids")
        @self._parent_app.module_login.login_required
        def get_random_uids():
            """
            Return to the client a list of working index IDs but in a random
            order. If there is currently an active IQR session with elements in
            its extension ingest, then those IDs are included in the random
            list.

            :return: {
                    uids: list of int
                }
            """
            with self.get_current_iqr_session() as iqrs:
                all_ids = list(iqrs.working_index.keys())
            random.shuffle(all_ids)
            return flask.jsonify({"uids": all_ids})

    def __del__(self):
        for wdir in self._iqr_work_dirs.values():
            if os.path.isdir(wdir):
                shutil.rmtree(wdir)

    def get_config(self):
        return {
            'name': self.name,
            'url_prefix': self.url_prefix,
            'working_directory': self._working_dir,
            'data_set': plugin.to_plugin_config(self._data_set),
            'descr_generator':
            plugin.to_plugin_config(self._descriptor_generator),
            'nn_index': plugin.to_plugin_config(self._nn_index),
            'rel_index_config': self._rel_index_config,
            'descriptor_factory': self._descr_elem_factory.get_config(),
        }

    @property
    def work_dir(self):
        """
        :return: Common work directory for this instance.
        :rtype: str
        """
        return osp.expanduser(osp.abspath(self._working_dir))

    def get_current_iqr_session(self):
        """
        Get the current IQR Session instance.

        :rtype: smqtk.IQR.iqr_session.IqrSession

        """
        with self._iqr_controller:
            sid = flask.session.sid
            if not self._iqr_controller.has_session_uuid(sid):
                iqr_sess = IqrSession(self._pos_seed_neighbors,
                                      self._rel_index_config, sid)
                self._iqr_controller.add_session(iqr_sess)
                self._iqr_work_dirs[iqr_sess.uuid] = \
                    osp.join(self.work_dir, sid)
                safe_create_dir(self._iqr_work_dirs[iqr_sess.uuid])
                self._iqr_example_data[iqr_sess.uuid] = {}
                self._iqr_example_pos_descr[iqr_sess.uuid] = {}

            return self._iqr_controller.get_session(sid)
示例#7
0
文件: iqr_search.py 项目: kod3r/SMQTK
class IqrSearch (flask.Blueprint, Configurable):
    """
    IQR Search Tab blueprint

    Components:
        * Data-set, from which base media data is provided
        * Descriptor generator, which provides descriptor generation services
          for user uploaded data.
        * NearestNeighborsIndex, from which descriptors are queried from user
          input data. This index should contain descriptors that were
          generated by the same descriptor generator configuration above (same
          dimensionality, etc.).
        * RelevancyIndex, which is populated by an initial query, and then
          iterated over within the same user session. A new instance and model
          is generated every time a new session is created (or new data is
          uploaded by the user).

    Assumes:
        * DescriptorElement related to a DataElement have the same UUIDs.

    """

    @classmethod
    def get_default_config(cls):
        d = super(IqrSearch, cls).get_default_config()

        # Remove parent_app slot for later explicit specification.
        del d['parent_app']

        # fill in plugin configs
        d['data_set'] = plugin.make_config(get_data_set_impls)

        d['descr_generator'] = plugin.make_config(get_descriptor_generator_impls)

        d['nn_index'] = plugin.make_config(get_nn_index_impls)

        ri_config = plugin.make_config(get_relevancy_index_impls)
        if d['rel_index_config']:
            ri_config.update(d['rel_index_config'])
        d['rel_index_config'] = ri_config

        return d

    # noinspection PyMethodOverriding
    @classmethod
    def from_config(cls, config, parent_app):
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        :param config: JSON compliant dictionary encapsulating
            a configuration.
        :type config: dict

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :return: Constructed instance from the provided config.
        :rtype: IqrSearch

        """
        # construct nested objects via configurations
        config['data_set'] = \
            plugin.from_plugin_config(config['data_set'],
                                      get_data_set_impls)
        config['descr_generator'] = \
            plugin.from_plugin_config(config['descr_generator'],
                                      get_descriptor_generator_impls)
        config['nn_index'] = \
            plugin.from_plugin_config(config['nn_index'],
                                      get_nn_index_impls)

        return cls(parent_app, **config)

    def __init__(self, parent_app, name, data_set, descr_generator, nn_index,
                 working_directory, rel_index_config=DFLT_REL_INDEX_CONFIG,
                 url_prefix=None, pos_seed_neighbors=500):
        """
        Initialize a generic IQR Search module with a single descriptor and
        indexer.

        :param name: Name of this blueprint instance
        :type name: str

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :param data_set: DataSet instance that references indexed data.
        :type data_set: SMQTK.representation.DataSet

        :param descr_generator: DescriptorGenerator instance to use in IQR
            sessions for generating descriptors on new data.
        :type descr_generator: smqtk.algorithms.descriptor_generator.DescriptorGenerator

        :param nn_index: NearestNeighborsIndex instance for sessions to pull
            their review data sets from.
        :type nn_index: smqtk.algorithms.nearest

        :param rel_index_config: Plugin configuration for the
        :type rel_index_config: dict

        :param working_directory: Directory in which to place working files.
            These may be considered temporary and may be removed between
            executions of this app. Retention of a work directory may speed
            things up in subsequent runs because of caching.

        :param url_prefix: Web address prefix for this blueprint.
        :type url_prefix: str

        :param pos_seed_neighbors: Number of neighbors to pull from the given
            ``nn_index`` for each positive exemplar when populating the working
            index, i.e. this value determines the size of the working index for
            IQR refinement. By default, we try to get 500 neighbors.

            Since there may be partial to significant overlap of near neighbors
            as a result of nn_index queries for positive exemplars, the working
            index may contain anywhere from this value's number of entries, to
            ``N*P``, where ``N`` is this value and ``P`` is the number of
            positive examples at the time of working index initialization.
        :type pos_seed_neighbors: int

        :raises ValueError: Invalid Descriptor or indexer type

        """
        # make sure URL prefix starts with a slash
        if not url_prefix.startswith('/'):
            url_prefix = '/' + url_prefix

        super(IqrSearch, self).__init__(
            name, import_name=__name__,
            static_folder=os.path.join(SCRIPT_DIR, "static"),
            template_folder=os.path.join(SCRIPT_DIR, "templates"),
            url_prefix=url_prefix
        )

        self._parent_app = parent_app
        self._data_set = data_set
        self._descriptor_generator = descr_generator
        self._nn_index = nn_index
        self._rel_index_config = rel_index_config

        self._pos_seed_neighbors = int(pos_seed_neighbors)

        # base directory that's transformed by the ``work_dir`` property into
        # an absolute path.
        self._working_dir = working_directory
        # Directory to put things to allow them to be statically available to
        # public users.
        self._static_data_prefix = "static/data"
        self._static_data_dir = osp.join(self.work_dir, 'static')

        # Custom static host sub-module
        self.mod_static_dir = StaticDirectoryHost('%s_static' % self.name,
                                                  self._static_data_dir,
                                                  self._static_data_prefix)
        self.register_blueprint(self.mod_static_dir)

        # Uploader Sub-Module
        self.upload_work_dir = os.path.join(self.work_dir, "uploads")
        self.mod_upload = FileUploadMod('%s_uploader' % self.name, parent_app,
                                        self.upload_work_dir,
                                        url_prefix='/uploader')
        self.register_blueprint(self.mod_upload)

        # IQR Session control
        # TODO: Move session management to database/remote?
        #       Create web-specific IqrSession class that stores/gets its state
        #       directly from database.
        self._iqr_controller = IqrController()

        # Preview Image Caching
        self._preview_cache = PreviewCache(osp.join(self._static_data_dir,
                                                    "previews"))

        # Cache mapping of written static files for data elements
        self._static_cache = {}

        #
        # Routing
        #

        @self.route("/")
        @self._parent_app.module_login.login_required
        def index():
            r = {
                "module_name": self.name,
                "uploader_url": self.mod_upload.url_prefix,
                "uploader_post_url": self.mod_upload.upload_post_url(),
            }
            r.update(parent_app.nav_bar_content())
            # noinspection PyUnresolvedReferences
            return flask.render_template("iqr_search_index.html", **r)

        @self.route('/iqr_session_info', methods=["GET"])
        @self._parent_app.module_login.login_required
        def iqr_session_info():
            """
            Get information about the current IRQ session
            """
            with self.get_current_iqr_session() as iqrs:
                # noinspection PyProtectedMember
                return flask.jsonify({
                    "uuid": iqrs.uuid,
                    "positive_uids":
                        tuple(d.uuid() for d in iqrs.positive_descriptors),
                    "negative_uids":
                        tuple(d.uuid() for d in iqrs.negative_descriptors),

                    "ex_pos": tuple(iqrs.ex_pos_descriptors),
                    "ex_neg": tuple(iqrs.ex_neg_descriptors),

                    "initialized": iqrs.working_index.count() > 0,
                    "index_size": iqrs.working_index.count(),
                })

        @self.route("/check_current_iqr_session")
        @self._parent_app.module_login.login_required
        def check_current_iqr_session():
            """
            Check that the current IQR session exists and is initialized.

            :rtype: {
                    success: bool
                }
            """
            # Getting the current IQR session ensures that one has been
            # constructed for the current session.
            with self.get_current_iqr_session():
                return flask.jsonify({
                    "success": True
                })

        @self.route("/get_data_preview_image", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_ingest_item_image_rep():
            """
            Return the base64 preview image data for the data file associated
            with the give UID.
            """
            uid = flask.request.args['uid']

            info = {
                "success": True,
                "message": None,
                "shape": None,  # (width, height)
                "static_file_link": None,
                "static_preview_link": None,
            }

            # Try to find a DataElement by the given UUID in our indexed data
            # or in the session's example data.
            #: :type: smqtk.representation.DataElement
            de = None
            if self._data_set.has_uuid(uid):
                de = self._data_set.get_data(uid)
            else:
                with self.get_current_iqr_session() as iqrs:
                    de = iqrs.ex_data.get(uid, None)

            if not de:
                info["success"] = False
                info["message"] = "UUID not part of the active data set!"
            else:
                # Preview_path should be a path within our statically hosted
                # area.
                preview_path = self._preview_cache.get_preview_image(de)
                img = PIL.Image.open(preview_path)
                info["shape"] = img.size

                if de.uuid() not in self._static_cache:
                    self._static_cache[de.uuid()] = \
                        de.write_temp(self._static_data_dir)

                # Need to format links by transforming the generated paths to
                # something usable by webpage:
                # - make relative to the static directory, and then pre-pending
                #   the known static url to the
                info["static_preview_link"] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(preview_path, self._static_data_dir)
                info['static_file_link'] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(self._static_cache[de.uuid()],
                                    self._static_data_dir)

            return flask.jsonify(info)

        @self.route('/iqr_ingest_file', methods=['POST'])
        @self._parent_app.module_login.login_required
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                iqr_sess = self.get_current_iqr_session()
                fid = flask.request.form['fid']

                self.log.debug("[%s::%s] Getting temporary filepath from "
                               "uploader module", iqr_sess.uuid, fid)
                upload_filepath = self.mod_upload.get_path_for_id(fid)
                self.mod_upload.clear_completed(fid)

                self.log.debug("[%s::%s] Moving uploaded file",
                               iqr_sess.uuid, fid)
                sess_upload = osp.join(iqr_sess.work_dir,
                                       osp.basename(upload_filepath))
                os.rename(upload_filepath, sess_upload)
                upload_data = DataFileElement(sess_upload)
                upload_data.uuid()

                # Extend session ingest -- modifying
                self.log.debug("[%s::%s] Adding new data to session positives",
                               iqr_sess.uuid, fid)
                iqr_sess.add_positive_data(upload_data)

                return str(upload_data.uuid())

        @self.route("/iqr_initialize", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_initialize():
            """
            Initialize IQR session working index based on current positive
            examples and adjudications.
            """
            with self.get_current_iqr_session() as iqrs:
                try:
                    iqrs.initialize()
                    return flask.jsonify({
                        "success": True,
                        "message": "Completed initialization",
                    })
                except Exception, ex:
                    return flask.jsonify({
                        "success": False,
                        "message": "ERROR: (%s) %s" % (type(ex).__name__,
                                                       str(ex))
                    })

        @self.route("/get_example_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_example_adjudication():
            """
            Get positive/negative status for a data/descriptor in our example
            set.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }

            """
            elem_uuid = flask.request.args['uid']
            with self.get_current_iqr_session() as iqrs:
                is_p = (elem_uuid in iqrs.ex_pos_descriptors)
                is_n = (elem_uuid in iqrs.ex_neg_descriptors)

                return flask.jsonify({
                    "is_pos": is_p,
                    "is_neg": is_n,
                })

        @self.route("/get_index_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_index_adjudication():
            """
            Get the adjudication status of a particular data/descriptor element
            by UUID.

            This should only ever return a dict where one of the two, or
            neither, are labeled True.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }
            """
            elem_uuid = flask.request.args['uid']
            with self.get_current_iqr_session() as iqrs:
                is_p = (
                    elem_uuid in set(d.uuid() for d in iqrs.positive_descriptors)
                )
                is_n = (
                    elem_uuid in set(d.uuid() for d in iqrs.negative_descriptors)
                )

                return flask.jsonify({
                    "is_pos": is_p,
                    "is_neg": is_n,
                })

        @self.route("/adjudicate", methods=["POST", "GET"])
        @self._parent_app.module_login.login_required
        def adjudicate():
            """
            Update adjudication for this session. This should specify UUIDs of
            data/descriptor elements in our working index.

            :return: {
                    success: <bool>,
                    message: <str>
                }
            """
            if flask.request.method == "POST":
                fetch = flask.request.form
            elif flask.request.method == "GET":
                fetch = flask.request.args
            else:
                raise RuntimeError("Invalid request method '%s'"
                                   % flask.request.method)

            pos_to_add = json.loads(fetch.get('add_pos', '[]'))
            pos_to_remove = json.loads(fetch.get('remove_pos', '[]'))
            neg_to_add = json.loads(fetch.get('add_neg', '[]'))
            neg_to_remove = json.loads(fetch.get('remove_neg', '[]'))

            self.log.debug("Adjudicated Positive{+%s, -%s}, Negative{+%s, -%s} "
                           % (pos_to_add, pos_to_remove,
                              neg_to_add, neg_to_remove))

            with self.get_current_iqr_session() as iqrs:
                iqrs.adjudicate(
                    tuple(iqrs.working_index.get_many_descriptors(*pos_to_add)),
                    tuple(iqrs.working_index.get_many_descriptors(*neg_to_add)),
                    tuple(iqrs.working_index.get_many_descriptors(*pos_to_remove)),
                    tuple(iqrs.working_index.get_many_descriptors(*neg_to_remove)),
                )
                self.log.debug("Now positive UUIDs: %s", iqrs.positive_descriptors)
                self.log.debug("Now negative UUIDs: %s", iqrs.negative_descriptors)

            return flask.jsonify({
                "success": True,
                "message": "Adjudicated Positive{+%s, -%s}, Negative{+%s, -%s} "
                           % (pos_to_add, pos_to_remove,
                              neg_to_add, neg_to_remove)
            })

        @self.route("/iqr_refine", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_refine():
            """
            Classify current IQR session indexer, updating ranking for
            display.

            Fails gracefully if there are no positive[/negative] adjudications.

            """
            with self.get_current_iqr_session() as iqrs:
                try:
                    iqrs.refine()
                    return flask.jsonify({
                        "success": True,
                        "message": "Completed refinement"
                    })
                except Exception, ex:
                    return flask.jsonify({
                        "success": False,
                        "message": "ERROR: (%s) %s" % (type(ex).__name__,
                                                       str(ex))
                    })
示例#8
0
文件: iqr_search.py 项目: kod3r/SMQTK
    def __init__(self, parent_app, name, data_set, descr_generator, nn_index,
                 working_directory, rel_index_config=DFLT_REL_INDEX_CONFIG,
                 url_prefix=None, pos_seed_neighbors=500):
        """
        Initialize a generic IQR Search module with a single descriptor and
        indexer.

        :param name: Name of this blueprint instance
        :type name: str

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :param data_set: DataSet instance that references indexed data.
        :type data_set: SMQTK.representation.DataSet

        :param descr_generator: DescriptorGenerator instance to use in IQR
            sessions for generating descriptors on new data.
        :type descr_generator: smqtk.algorithms.descriptor_generator.DescriptorGenerator

        :param nn_index: NearestNeighborsIndex instance for sessions to pull
            their review data sets from.
        :type nn_index: smqtk.algorithms.nearest

        :param rel_index_config: Plugin configuration for the
        :type rel_index_config: dict

        :param working_directory: Directory in which to place working files.
            These may be considered temporary and may be removed between
            executions of this app. Retention of a work directory may speed
            things up in subsequent runs because of caching.

        :param url_prefix: Web address prefix for this blueprint.
        :type url_prefix: str

        :param pos_seed_neighbors: Number of neighbors to pull from the given
            ``nn_index`` for each positive exemplar when populating the working
            index, i.e. this value determines the size of the working index for
            IQR refinement. By default, we try to get 500 neighbors.

            Since there may be partial to significant overlap of near neighbors
            as a result of nn_index queries for positive exemplars, the working
            index may contain anywhere from this value's number of entries, to
            ``N*P``, where ``N`` is this value and ``P`` is the number of
            positive examples at the time of working index initialization.
        :type pos_seed_neighbors: int

        :raises ValueError: Invalid Descriptor or indexer type

        """
        # make sure URL prefix starts with a slash
        if not url_prefix.startswith('/'):
            url_prefix = '/' + url_prefix

        super(IqrSearch, self).__init__(
            name, import_name=__name__,
            static_folder=os.path.join(SCRIPT_DIR, "static"),
            template_folder=os.path.join(SCRIPT_DIR, "templates"),
            url_prefix=url_prefix
        )

        self._parent_app = parent_app
        self._data_set = data_set
        self._descriptor_generator = descr_generator
        self._nn_index = nn_index
        self._rel_index_config = rel_index_config

        self._pos_seed_neighbors = int(pos_seed_neighbors)

        # base directory that's transformed by the ``work_dir`` property into
        # an absolute path.
        self._working_dir = working_directory
        # Directory to put things to allow them to be statically available to
        # public users.
        self._static_data_prefix = "static/data"
        self._static_data_dir = osp.join(self.work_dir, 'static')

        # Custom static host sub-module
        self.mod_static_dir = StaticDirectoryHost('%s_static' % self.name,
                                                  self._static_data_dir,
                                                  self._static_data_prefix)
        self.register_blueprint(self.mod_static_dir)

        # Uploader Sub-Module
        self.upload_work_dir = os.path.join(self.work_dir, "uploads")
        self.mod_upload = FileUploadMod('%s_uploader' % self.name, parent_app,
                                        self.upload_work_dir,
                                        url_prefix='/uploader')
        self.register_blueprint(self.mod_upload)

        # IQR Session control
        # TODO: Move session management to database/remote?
        #       Create web-specific IqrSession class that stores/gets its state
        #       directly from database.
        self._iqr_controller = IqrController()

        # Preview Image Caching
        self._preview_cache = PreviewCache(osp.join(self._static_data_dir,
                                                    "previews"))

        # Cache mapping of written static files for data elements
        self._static_cache = {}

        #
        # Routing
        #

        @self.route("/")
        @self._parent_app.module_login.login_required
        def index():
            r = {
                "module_name": self.name,
                "uploader_url": self.mod_upload.url_prefix,
                "uploader_post_url": self.mod_upload.upload_post_url(),
            }
            r.update(parent_app.nav_bar_content())
            # noinspection PyUnresolvedReferences
            return flask.render_template("iqr_search_index.html", **r)

        @self.route('/iqr_session_info', methods=["GET"])
        @self._parent_app.module_login.login_required
        def iqr_session_info():
            """
            Get information about the current IRQ session
            """
            with self.get_current_iqr_session() as iqrs:
                # noinspection PyProtectedMember
                return flask.jsonify({
                    "uuid": iqrs.uuid,
                    "positive_uids":
                        tuple(d.uuid() for d in iqrs.positive_descriptors),
                    "negative_uids":
                        tuple(d.uuid() for d in iqrs.negative_descriptors),

                    "ex_pos": tuple(iqrs.ex_pos_descriptors),
                    "ex_neg": tuple(iqrs.ex_neg_descriptors),

                    "initialized": iqrs.working_index.count() > 0,
                    "index_size": iqrs.working_index.count(),
                })

        @self.route("/check_current_iqr_session")
        @self._parent_app.module_login.login_required
        def check_current_iqr_session():
            """
            Check that the current IQR session exists and is initialized.

            :rtype: {
                    success: bool
                }
            """
            # Getting the current IQR session ensures that one has been
            # constructed for the current session.
            with self.get_current_iqr_session():
                return flask.jsonify({
                    "success": True
                })

        @self.route("/get_data_preview_image", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_ingest_item_image_rep():
            """
            Return the base64 preview image data for the data file associated
            with the give UID.
            """
            uid = flask.request.args['uid']

            info = {
                "success": True,
                "message": None,
                "shape": None,  # (width, height)
                "static_file_link": None,
                "static_preview_link": None,
            }

            # Try to find a DataElement by the given UUID in our indexed data
            # or in the session's example data.
            #: :type: smqtk.representation.DataElement
            de = None
            if self._data_set.has_uuid(uid):
                de = self._data_set.get_data(uid)
            else:
                with self.get_current_iqr_session() as iqrs:
                    de = iqrs.ex_data.get(uid, None)

            if not de:
                info["success"] = False
                info["message"] = "UUID not part of the active data set!"
            else:
                # Preview_path should be a path within our statically hosted
                # area.
                preview_path = self._preview_cache.get_preview_image(de)
                img = PIL.Image.open(preview_path)
                info["shape"] = img.size

                if de.uuid() not in self._static_cache:
                    self._static_cache[de.uuid()] = \
                        de.write_temp(self._static_data_dir)

                # Need to format links by transforming the generated paths to
                # something usable by webpage:
                # - make relative to the static directory, and then pre-pending
                #   the known static url to the
                info["static_preview_link"] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(preview_path, self._static_data_dir)
                info['static_file_link'] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(self._static_cache[de.uuid()],
                                    self._static_data_dir)

            return flask.jsonify(info)

        @self.route('/iqr_ingest_file', methods=['POST'])
        @self._parent_app.module_login.login_required
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                iqr_sess = self.get_current_iqr_session()
                fid = flask.request.form['fid']

                self.log.debug("[%s::%s] Getting temporary filepath from "
                               "uploader module", iqr_sess.uuid, fid)
                upload_filepath = self.mod_upload.get_path_for_id(fid)
                self.mod_upload.clear_completed(fid)

                self.log.debug("[%s::%s] Moving uploaded file",
                               iqr_sess.uuid, fid)
                sess_upload = osp.join(iqr_sess.work_dir,
                                       osp.basename(upload_filepath))
                os.rename(upload_filepath, sess_upload)
                upload_data = DataFileElement(sess_upload)
                upload_data.uuid()

                # Extend session ingest -- modifying
                self.log.debug("[%s::%s] Adding new data to session positives",
                               iqr_sess.uuid, fid)
                iqr_sess.add_positive_data(upload_data)

                return str(upload_data.uuid())

        @self.route("/iqr_initialize", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_initialize():
            """
            Initialize IQR session working index based on current positive
            examples and adjudications.
            """
            with self.get_current_iqr_session() as iqrs:
                try:
                    iqrs.initialize()
                    return flask.jsonify({
                        "success": True,
                        "message": "Completed initialization",
                    })
                except Exception, ex:
                    return flask.jsonify({
                        "success": False,
                        "message": "ERROR: (%s) %s" % (type(ex).__name__,
                                                       str(ex))
                    })
示例#9
0
    def __init__(self, parent_app, name, data_set, descr_generator, nn_index,
                 working_directory, rel_index_config=DFLT_REL_INDEX_CONFIG,
                 descriptor_factory=DFLT_MEMORY_DESCR_FACTORY,
                 url_prefix=None, pos_seed_neighbors=500):
        """
        Initialize a generic IQR Search module with a single descriptor and
        indexer.

        :param name: Name of this blueprint instance
        :type name: str

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :param data_set: DataSet instance that references indexed data.
        :type data_set: SMQTK.representation.DataSet

        :param descr_generator: DescriptorGenerator instance to use in IQR
            sessions for generating descriptors on new data.
        :type descr_generator: smqtk.algorithms.descriptor_generator.DescriptorGenerator

        :param nn_index: NearestNeighborsIndex instance for sessions to pull
            their review data sets from.
        :type nn_index: smqtk.algorithms.nearest

        :param rel_index_config: Plugin configuration for the
        :type rel_index_config: dict

        :param working_directory: Directory in which to place working files.
            These may be considered temporary and may be removed between
            executions of this app. Retention of a work directory may speed
            things up in subsequent runs because of caching.

        :param descriptor_factory: DescriptorElementFactory for producing new
            DescriptorElement instances when data is uploaded to the server.
        :type descriptor_factory: DescriptorElementFactory

        :param url_prefix: Web address prefix for this blueprint.
        :type url_prefix: str

        :param pos_seed_neighbors: Number of neighbors to pull from the given
            ``nn_index`` for each positive exemplar when populating the working
            index, i.e. this value determines the size of the working index for
            IQR refinement. By default, we try to get 500 neighbors.

            Since there may be partial to significant overlap of near neighbors
            as a result of nn_index queries for positive exemplars, the working
            index may contain anywhere from this value's number of entries, to
            ``N*P``, where ``N`` is this value and ``P`` is the number of
            positive examples at the time of working index initialization.
        :type pos_seed_neighbors: int

        :raises ValueError: Invalid Descriptor or indexer type

        """
        # make sure URL prefix starts with a slash
        if not url_prefix.startswith('/'):
            url_prefix = '/' + url_prefix

        super(IqrSearch, self).__init__(
            name, import_name=__name__,
            static_folder=os.path.join(SCRIPT_DIR, "static"),
            template_folder=os.path.join(SCRIPT_DIR, "templates"),
            url_prefix=url_prefix
        )

        self._parent_app = parent_app
        self._data_set = data_set
        self._descriptor_generator = descr_generator
        self._nn_index = nn_index
        self._rel_index_config = rel_index_config
        self._descr_elem_factory = descriptor_factory

        self._pos_seed_neighbors = int(pos_seed_neighbors)

        # base directory that's transformed by the ``work_dir`` property into
        # an absolute path.
        self._working_dir = working_directory
        # Directory to put things to allow them to be statically available to
        # public users.
        self._static_data_prefix = "static/data"
        self._static_data_dir = osp.join(self.work_dir, 'static')

        # Custom static host sub-module
        self.mod_static_dir = StaticDirectoryHost('%s_static' % self.name,
                                                  self._static_data_dir,
                                                  self._static_data_prefix)
        self.register_blueprint(self.mod_static_dir)

        # Uploader Sub-Module
        self.upload_work_dir = os.path.join(self.work_dir, "uploads")
        self.mod_upload = FileUploadMod('%s_uploader' % self.name, parent_app,
                                        self.upload_work_dir,
                                        url_prefix='/uploader')
        self.register_blueprint(self.mod_upload)

        # IQR Session control
        # TODO: Move session management to database/remote?
        #       Create web-specific IqrSession class that stores/gets its state
        #       directly from database.
        self._iqr_controller = IqrController()

        # Preview Image Caching
        self._preview_cache = PreviewCache(osp.join(self._static_data_dir,
                                                    "previews"))

        # Cache mapping of written static files for data elements
        self._static_cache = {}
        self._static_cache_element = {}

        #
        # Routing
        #

        @self.route("/")
        @self._parent_app.module_login.login_required
        def index():
            r = {
                "module_name": self.name,
                "uploader_url": self.mod_upload.url_prefix,
                "uploader_post_url": self.mod_upload.upload_post_url(),
            }
            r.update(parent_app.nav_bar_content())
            # noinspection PyUnresolvedReferences
            return flask.render_template("iqr_search_index.html", **r)

        @self.route('/iqr_session_info', methods=["GET"])
        @self._parent_app.module_login.login_required
        def iqr_session_info():
            """
            Get information about the current IRQ session
            """
            with self.get_current_iqr_session() as iqrs:
                # noinspection PyProtectedMember
                return flask.jsonify({
                    "uuid": iqrs.uuid,
                    "positive_uids":
                        tuple(d.uuid() for d in iqrs.positive_descriptors),
                    "negative_uids":
                        tuple(d.uuid() for d in iqrs.negative_descriptors),

                    "ex_pos": tuple(iqrs.ex_pos_descriptors),
                    "ex_neg": tuple(iqrs.ex_neg_descriptors),

                    "initialized": iqrs.working_index.count() > 0,
                    "index_size": iqrs.working_index.count(),
                })

        @self.route("/check_current_iqr_session")
        @self._parent_app.module_login.login_required
        def check_current_iqr_session():
            """
            Check that the current IQR session exists and is initialized.

            :rtype: {
                    success: bool
                }
            """
            # Getting the current IQR session ensures that one has been
            # constructed for the current session.
            with self.get_current_iqr_session():
                return flask.jsonify({
                    "success": True
                })

        @self.route("/get_data_preview_image", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_ingest_item_image_rep():
            """
            Return the base64 preview image data for the data file associated
            with the give UID.
            """
            uid = flask.request.args['uid']

            info = {
                "success": True,
                "message": None,
                "shape": None,  # (width, height)
                "static_file_link": None,
                "static_preview_link": None,
            }

            # Try to find a DataElement by the given UUID in our indexed data
            # or in the session's example data.
            #: :type: smqtk.representation.DataElement
            de = None
            if self._data_set.has_uuid(uid):
                de = self._data_set.get_data(uid)
            else:
                with self.get_current_iqr_session() as iqrs:
                    de = iqrs.ex_data.get(uid, None)

            if not de:
                info["success"] = False
                info["message"] = "UUID not part of the active data set!"
            else:
                # Preview_path should be a path within our statically hosted
                # area.
                preview_path = self._preview_cache.get_preview_image(de)
                img = PIL.Image.open(preview_path)
                info["shape"] = img.size

                if de.uuid() not in self._static_cache:
                    self._static_cache[de.uuid()] = \
                        de.write_temp(self._static_data_dir)
                    self._static_cache_element[de.uuid()] = de

                # Need to format links by transforming the generated paths to
                # something usable by webpage:
                # - make relative to the static directory, and then pre-pending
                #   the known static url to the
                info["static_preview_link"] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(preview_path, self._static_data_dir)
                info['static_file_link'] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(self._static_cache[de.uuid()],
                                    self._static_data_dir)

            return flask.jsonify(info)

        @self.route('/iqr_ingest_file', methods=['POST'])
        @self._parent_app.module_login.login_required
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                iqr_sess = self.get_current_iqr_session()
                fid = flask.request.form['fid']

                self.log.debug("[%s::%s] Getting temporary filepath from "
                               "uploader module", iqr_sess.uuid, fid)
                upload_filepath = self.mod_upload.get_path_for_id(fid)
                self.mod_upload.clear_completed(fid)

                self.log.debug("[%s::%s] Moving uploaded file",
                               iqr_sess.uuid, fid)
                sess_upload = osp.join(iqr_sess.work_dir,
                                       osp.basename(upload_filepath))
                os.rename(upload_filepath, sess_upload)
                upload_data = DataFileElement(sess_upload)
                upload_data.uuid()

                # Extend session ingest -- modifying
                self.log.debug("[%s::%s] Adding new data to session positives",
                               iqr_sess.uuid, fid)
                iqr_sess.add_positive_data(upload_data)

                return str(upload_data.uuid())

        @self.route("/iqr_initialize", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_initialize():
            """
            Initialize IQR session working index based on current positive
            examples and adjudications.
            """
            with self.get_current_iqr_session() as iqrs:
                try:
                    iqrs.initialize()
                    return flask.jsonify({
                        "success": True,
                        "message": "Completed initialization",
                    })
                except Exception, ex:
                    return flask.jsonify({
                        "success": False,
                        "message": "ERROR: (%s) %s" % (type(ex).__name__,
                                                       str(ex))
                    })