示例#1
0
    def __init__(self, parent_app, iqr_service_url, data_set,
                 working_directory):
        """
        Initialize a generic IQR Search module with a single descriptor and
        indexer.

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.IqrSearchDispatcher

        :param iqr_service_url: Base URL to the IQR service to use for this
            application interface. Any trailing slashes will be striped.
        :type iqr_service_url: str

        :param data_set: DataSet of the content described by indexed descriptors
            in the linked IQR service.
        :type data_set: smqtk.representation.DataSet

        :param working_directory: Directory in which to place working files.
            These may be considered temporary and may be removed between
            executions of this app.
        :type working_directory: str

        :raises ValueError: Invalid Descriptor or indexer type

        """
        super(IqrSearch, self).__init__(
            import_name=__name__,
            static_folder=os.path.join(SCRIPT_DIR, "static"),
            template_folder=os.path.join(SCRIPT_DIR, "templates"),
        )

        self._parent_app = parent_app
        self._data_set = data_set
        self._iqr_service = ServiceProxy(iqr_service_url.rstrip('/'))

        # base directory that's transformed by the ``work_dir`` property into
        # an absolute path.
        self._working_dir = working_directory
        # Directory to put things to allow them to be statically available to
        # public users.
        self._static_data_prefix = "static/data"
        self._static_data_dir = osp.join(self.work_dir, 'static')

        # Custom static host sub-module
        self.mod_static_dir = StaticDirectoryHost('%s_static' % self.name,
                                                  self._static_data_dir,
                                                  self._static_data_prefix)
        self.register_blueprint(self.mod_static_dir)

        # Uploader Sub-Module
        self.upload_work_dir = os.path.join(self.work_dir, "uploads")
        self.mod_upload = FileUploadMod('%s_uploader' % self.name, parent_app,
                                        self.upload_work_dir,
                                        url_prefix='/uploader')
        self.register_blueprint(self.mod_upload)
        self.register_blueprint(parent_app.module_login)

        # Mapping of session IDs to their work directory
        #: :type: dict[str, str]
        self._iqr_work_dirs: Dict[str, str] = {}
        # Mapping of session ID to a dictionary of the custom example data for
        # a session (uuid -> DataElement)
        self._iqr_example_data: Dict[
            str,
            Dict[Hashable, DataElement]
        ] = {}

        # Preview Image Caching
        self._preview_cache = PreviewCache(osp.join(self._static_data_dir,
                                                    "previews"))

        # Cache mapping of written static files for data elements
        self._static_cache: Dict[Hashable, str] = {}
        self._static_cache_element: Dict[Hashable, DataElement] = {}

        #
        # Routing
        #

        @self.route("/")
        @self._parent_app.module_login.login_required
        def index():
            # Stripping left '/' from blueprint modules in order to make sure
            # the paths are relative to our base.
            assert self.mod_upload.url_prefix is not None, (
                "Currently assuming the upload module has a non-None URL "
                "prefix."
            )
            r = {
                "module_name": self.name,
                "uploader_url": self.mod_upload.url_prefix.lstrip('/'),
                "uploader_post_url":
                    self.mod_upload.upload_post_url().lstrip('/'),
            }
            self._log.debug("Uploader URL: %s", r['uploader_url'])
            # noinspection PyUnresolvedReferences
            return flask.render_template("iqr_search_index.html", **r)

        @self.route('/iqr_session_info', methods=["GET"])
        @self._parent_app.module_login.login_required
        def iqr_session_info():
            """
            Get information about the current IRQ session
            """
            sid = self.get_current_iqr_session()
            get_r = self._iqr_service.get('session', sid=sid)
            get_r.raise_for_status()
            return flask.jsonify(get_r.json())

        @self.route('/get_iqr_state')
        @self._parent_app.module_login.login_required
        def iqr_session_state():
            """
            Get IQR session state information composed of positive and negative
            descriptor vectors.

            We append to the state received from the service in order to produce
            a state byte package that is compatible with the
            ``IqrSession.set_state_bytes`` method. This way state bytes received
            from this function can be directly consumed by the IQR service or
            other IqrSession instances.

            """
            sid = self.get_current_iqr_session()

            # Get the state base64 from the underlying service.
            r_get = self._iqr_service.get('state', sid=sid)
            r_get.raise_for_status()
            state_b64 = r_get.json()['state_b64']
            state_bytes = base64.b64decode(state_b64)

            # Load state dictionary from base-64 ZIP payload from service
            # - GET content is base64, so decode first and then read as a
            #   ZipFile buffer.
            # - `r_get.content` is `byte` type so it can be passed directly to
            #   base64 decode.
            state_dict = json.load(
                zipfile.ZipFile(
                    BytesIO(state_bytes),
                    'r',
                    IqrSession.STATE_ZIP_COMPRESSION
                ).open(IqrSession.STATE_ZIP_FILENAME)
            )
            r_get.close()

            # Wrap service state with our UI state: uploaded data elements.
            # Data elements are stored as a dictionary mapping UUID to MIMETYPE
            # and data byte string.
            working_data = {}
            sid_data_elems = self._iqr_example_data.get(sid, {})
            for uid, workingElem in six.iteritems(sid_data_elems):
                # Decoding base64 as ASCII knowing that
                # `base64.urlsafe_b64decode` is used later, whose doc-string
                # states that it may expect an ASCII string when not bytes.
                working_data[uid] = {
                    'content_type': workingElem.content_type(),
                    'bytes_base64':
                        base64.b64encode(workingElem.get_bytes())
                              .decode('ascii'),
                }

            state_dict["working_data"] = working_data
            state_json = json.dumps(state_dict)

            z_wrapper_buffer = BytesIO()
            z_wrapper = zipfile.ZipFile(z_wrapper_buffer, 'w',
                                        IqrSession.STATE_ZIP_COMPRESSION)
            z_wrapper.writestr(IqrSession.STATE_ZIP_FILENAME, state_json)
            z_wrapper.close()

            z_wrapper_buffer.seek(0)
            return flask.send_file(
                z_wrapper_buffer,
                mimetype='application/octet-stream',
                as_attachment=True,
                attachment_filename="%s.IqrState" % sid
            )

        @self.route('/set_iqr_state', methods=['PUT'])
        @self._parent_app.module_login.login_required
        def set_iqr_session_state():
            """
            Set the current state based on the given state file.
            """
            sid = self.get_current_iqr_session()
            fid = flask.request.form.get('fid', None)

            return_obj: Dict[str, Any] = {
                'success': False,
            }

            #
            # Load in state zip package, prepare zip package for service
            #

            if fid is None:
                return_obj['message'] = 'No file ID provided.'

            self._log.debug("[%s::%s] Getting temporary filepath from "
                            "uploader module", sid, fid)
            upload_filepath = self.mod_upload.get_path_for_id(fid)
            self.mod_upload.clear_completed(fid)

            # Load ZIP package back in, then remove the uploaded file.
            try:
                z = zipfile.ZipFile(
                    upload_filepath,
                    compression=IqrSession.STATE_ZIP_COMPRESSION
                )
                with z.open(IqrSession.STATE_ZIP_FILENAME) as f:
                    state_dict = json.load(f)
                z.close()
            finally:
                os.remove(upload_filepath)

            #
            # Consume working data UUID/bytes
            #
            # Reset this server's resources for an SID
            self.reset_session_local(sid)
            # - Dictionary of data UUID (SHA1) to {'content_type': <str>,
            #   'bytes_base64': <str>} dictionary.
            #: :type: dict[str, dict]
            working_data = state_dict['working_data']
            del state_dict['working_data']
            # - Write out base64-decoded files to session-specific work
            #   directory.
            # - Update self._iqr_example_data with DataFileElement instances
            #   referencing the just-written files.
            for uuid_sha1 in working_data:
                data_mimetype = working_data[uuid_sha1]['content_type']
                data_b64 = str(working_data[uuid_sha1]['bytes_base64'])
                # Output file to working directory on disk.
                data_filepath = os.path.join(
                    self._iqr_work_dirs[sid],
                    '%s%s' % (uuid_sha1, MT.guess_extension(data_mimetype))
                )
                with open(data_filepath, 'wb') as f:
                    f.write(base64.urlsafe_b64decode(data_b64))
                # Create element reference and store it for the current session.
                data_elem = DataFileElement(data_filepath, readonly=True)
                self._iqr_example_data[sid][uuid_sha1] = data_elem

            #
            # Re-package service state as a ZIP payload.
            #
            service_zip_buffer = BytesIO()
            service_zip = zipfile.ZipFile(service_zip_buffer, 'w',
                                          IqrSession.STATE_ZIP_COMPRESSION)
            service_zip.writestr(IqrSession.STATE_ZIP_FILENAME,
                                 json.dumps(state_dict))
            service_zip.close()
            service_zip_base64 = \
                base64.b64encode(service_zip_buffer.getvalue())

            # Update service state
            self._iqr_service.put('state',
                                  sid=sid,
                                  state_base64=service_zip_base64)

            return flask.jsonify(return_obj)

        @self.route("/check_current_iqr_session")
        @self._parent_app.module_login.login_required
        def check_current_iqr_session():
            """
            Check that the current IQR session exists and is initialized.

            Return JSON:
                success
                    Always True if the message returns.

            """
            # Getting the current IQR session ensures that one has been
            # constructed for the current session.
            _ = self.get_current_iqr_session()
            return flask.jsonify({
                "success": True
            })

        @self.route("/get_data_preview_image", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_ingest_item_image_rep():
            """
            Return the base64 preview image data link for the data file
            associated with the give UID (plus some other metadata).
            """
            uid = flask.request.args['uid']

            info: Dict[str, Any] = {
                "success": True,
                "message": None,
                "shape": None,  # (width, height)
                "static_file_link": None,
                "static_preview_link": None,
            }

            # Try to find a DataElement by the given UUID in our indexed data
            # or in the session's example data.
            if self._data_set.has_uuid(uid):
                #: :type: smqtk.representation.DataElement
                de = self._data_set.get_data(uid)
            else:
                sid = self.get_current_iqr_session()
                #: :type: smqtk.representation.DataElement | None
                de = self._iqr_example_data[sid].get(uid, None)

            if not de:
                info["success"] = False
                info["message"] = "UUID '%s' not part of the base or working " \
                                  "data set!" % uid
            else:
                # Preview_path should be a path within our statically hosted
                # area.
                preview_path = self._preview_cache.get_preview_image(de)
                img = PIL.Image.open(preview_path)
                info["shape"] = img.size

                if de.uuid() not in self._static_cache:
                    self._static_cache[de.uuid()] = \
                        de.write_temp(self._static_data_dir)
                    self._static_cache_element[de.uuid()] = de

                # Need to format links by transforming the generated paths to
                # something usable by webpage:
                # - make relative to the static directory, and then pre-pending
                #   the known static url to the
                info["static_preview_link"] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(preview_path, self._static_data_dir)
                info['static_file_link'] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(self._static_cache[de.uuid()],
                                    self._static_data_dir)

            return flask.jsonify(info)

        @self.route('/iqr_ingest_file', methods=['POST'])
        @self._parent_app.module_login.login_required
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            fid = flask.request.form['fid']

            sid = self.get_current_iqr_session()

            self._log.debug("[%s::%s] Getting temporary filepath from "
                            "uploader module", sid, fid)
            upload_filepath = self.mod_upload.get_path_for_id(fid)
            self.mod_upload.clear_completed(fid)

            self._log.debug("[%s::%s] Moving uploaded file",
                            sid, fid)
            sess_upload = osp.join(self._iqr_work_dirs[sid],
                                   osp.basename(upload_filepath))
            os.rename(upload_filepath, sess_upload)

            # Record uploaded data as user example data for this session.
            upload_data = DataFileElement(sess_upload)
            uuid = upload_data.uuid()
            self._iqr_example_data[sid][uuid] = upload_data

            # Extend session ingest -- modifying
            self._log.debug("[%s::%s] Adding new data to session "
                            "external positives", sid, fid)
            data_b64 = base64.b64encode(upload_data.get_bytes())
            data_ct = upload_data.content_type()
            r = self._iqr_service.post('add_external_pos', sid=sid,
                                       base64=data_b64, content_type=data_ct)
            r.raise_for_status()

            return str(uuid)

        @self.route("/iqr_initialize", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_initialize():
            """
            Initialize IQR session working index based on current positive
            examples and adjudications.
            """
            sid = self.get_current_iqr_session()

            # (Re)Initialize working index
            post_r = self._iqr_service.post('initialize', sid=sid)
            post_r.raise_for_status()

            return flask.jsonify(post_r.json())

        @self.route("/get_example_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_example_adjudication():
            """
            Get positive/negative status for a data/descriptor in our example
            set.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }

            """
            # TODO: Collapse example and index adjudication endpoints.
            elem_uuid = flask.request.args['uid']
            sid = self.get_current_iqr_session()
            get_r = self._iqr_service.get('adjudicate', sid=sid, uid=elem_uuid)
            get_r.raise_for_status()
            get_r_json = get_r.json()
            return flask.jsonify({
                "is_pos": get_r_json['is_pos'],
                "is_neg": get_r_json['is_neg'],
            })

        @self.route("/get_index_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_index_adjudication():
            """
            Get the adjudication status of a particular data/descriptor element
            by UUID.

            This should only ever return a dict where one of the two, or
            neither, are labeled True.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }
            """
            # TODO: Collapse example and index adjudication endpoints.
            elem_uuid = flask.request.args['uid']
            sid = self.get_current_iqr_session()
            get_r = self._iqr_service.get('adjudicate', sid=sid, uid=elem_uuid)
            get_r.raise_for_status()
            get_r_json = get_r.json()
            return flask.jsonify({
                "is_pos": get_r_json['is_pos'],
                "is_neg": get_r_json['is_neg'],
            })

        @self.route("/adjudicate", methods=["POST"])
        @self._parent_app.module_login.login_required
        def adjudicate():
            """
            Update adjudication for this session. This should specify UUIDs of
            data/descriptor elements in our working index.

            :return: {
                    success: <bool>,
                    message: <str>
                }
            """
            pos_to_add = json.loads(flask.request.form.get('add_pos', '[]'))
            pos_to_remove = json.loads(flask.request.form.get('remove_pos',
                                                              '[]'))
            neg_to_add = json.loads(flask.request.form.get('add_neg', '[]'))
            neg_to_remove = json.loads(flask.request.form.get('remove_neg',
                                                              '[]'))

            msg = "Adjudicated Positive{+%s, -%s}, " \
                  "Negative{+%s, -%s} " \
                  % (pos_to_add, pos_to_remove,
                     neg_to_add, neg_to_remove)
            self._log.debug(msg)

            sid = self.get_current_iqr_session()

            to_neutral = list(set(pos_to_remove) | set(neg_to_remove))

            post_r = self._iqr_service.post('adjudicate',
                                            sid=sid,
                                            pos=json.dumps(pos_to_add),
                                            neg=json.dumps(neg_to_add),
                                            neutral=json.dumps(to_neutral))
            post_r.raise_for_status()

            return flask.jsonify({
                "success": True,
                "message": msg
            })

        @self.route("/iqr_refine", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_refine():
            """
            Classify current IQR session indexer, updating ranking for
            display.

            Fails gracefully if there are no positive[/negative] adjudications.

            """
            sid = self.get_current_iqr_session()
            post_r = self._iqr_service.post('refine', sid=sid)
            post_r.raise_for_status()
            return flask.jsonify({
                "success": True,
                "message": "Completed refinement",
            })

        @self.route("/iqr_ordered_results", methods=['GET'])
        @self._parent_app.module_login.login_required
        def get_ordered_results():
            """
            Get ordered (UID, probability) pairs in between the given indices,
            [i, j). If j Is beyond the end of available results, only available
            results are returned.

            This may be empty if no refinement has yet occurred.

            Return format:
            {
                results: [ (uid, probability), ... ]
            }
            """
            i = flask.request.args.get('i', None)
            j = flask.request.args.get('j', None)

            params = {
                'sid': self.get_current_iqr_session(),
            }
            if i is not None:
                params['i'] = int(i)
            if j is not None:
                params['j'] = int(j)

            get_r = self._iqr_service.get('get_results', **params)
            get_r.raise_for_status()
            return flask.jsonify(get_r.json())

        @self.route("/reset_iqr_session", methods=["POST"])
        @self._parent_app.module_login.login_required
        def reset_iqr_session():
            """
            Reset the current IQR session
            """
            sid = self.get_current_iqr_session()
            # Reset service
            put_r = self._iqr_service.put('session', sid=sid)
            put_r.raise_for_status()
            # Reset local server resources
            self.reset_session_local(sid)
            return flask.jsonify({"success": True})

        @self.route("/get_random_uids")
        @self._parent_app.module_login.login_required
        def get_random_uids():
            """
            Return to the client a list of data/descriptor IDs available in the
            configured data set (NOT descriptor/NNI set).

            Thus, we assume that the nearest neighbor index that is searchable
            is from at least this set of data.

            :return: {
                    uids: list[str]
                }
            """
            all_ids = list(self._data_set.uuids())
            random.shuffle(all_ids)
            return flask.jsonify({
                "uids": all_ids
            })

        @self.route('/is_ready')
        def is_ready():
            """ Simple 'I'm alive' endpoint """
            return flask.jsonify({
                "alive": True,
            })
示例#2
0
    def __init__(self,
                 parent_app,
                 name,
                 data_set,
                 descr_generator,
                 nn_index,
                 working_directory,
                 rel_index_config=DFLT_REL_INDEX_CONFIG,
                 descriptor_factory=DFLT_MEMORY_DESCR_FACTORY,
                 url_prefix=None,
                 pos_seed_neighbors=500):
        """
        Initialize a generic IQR Search module with a single descriptor and
        indexer.

        :param name: Name of this blueprint instance
        :type name: str

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :param data_set: DataSet instance that references indexed data.
        :type data_set: SMQTK.representation.DataSet

        :param descr_generator: DescriptorGenerator instance to use in IQR
            sessions for generating descriptors on new data.
        :type descr_generator:
            smqtk.algorithms.descriptor_generator.DescriptorGenerator

        :param nn_index: NearestNeighborsIndex instance for sessions to pull
            their review data sets from.
        :type nn_index: smqtk.algorithms.NearestNeighborsIndex

        :param rel_index_config: Plugin configuration for the RelevancyIndex to
            use.
        :type rel_index_config: dict

        :param working_directory: Directory in which to place working files.
            These may be considered temporary and may be removed between
            executions of this app. Retention of a work directory may speed
            things up in subsequent runs because of caching.

        :param descriptor_factory: DescriptorElementFactory for producing new
            DescriptorElement instances when data is uploaded to the server.
        :type descriptor_factory: DescriptorElementFactory

        :param url_prefix: Web address prefix for this blueprint.
        :type url_prefix: str

        :param pos_seed_neighbors: Number of neighbors to pull from the given
            ``nn_index`` for each positive exemplar when populating the working
            index, i.e. this value determines the size of the working index for
            IQR refinement. By default, we try to get 500 neighbors.

            Since there may be partial to significant overlap of near neighbors
            as a result of nn_index queries for positive exemplars, the working
            index may contain anywhere from this value's number of entries, to
            ``N*P``, where ``N`` is this value and ``P`` is the number of
            positive examples at the time of working index initialization.
        :type pos_seed_neighbors: int

        :raises ValueError: Invalid Descriptor or indexer type

        """
        # make sure URL prefix starts with a slash
        if not url_prefix.startswith('/'):
            url_prefix = '/' + url_prefix

        super(IqrSearch,
              self).__init__(name,
                             import_name=__name__,
                             static_folder=os.path.join(SCRIPT_DIR, "static"),
                             template_folder=os.path.join(
                                 SCRIPT_DIR, "templates"),
                             url_prefix=url_prefix)

        self._parent_app = parent_app
        self._data_set = data_set
        self._descriptor_generator = descr_generator
        self._nn_index = nn_index
        self._rel_index_config = rel_index_config
        self._descr_elem_factory = descriptor_factory

        self._pos_seed_neighbors = int(pos_seed_neighbors)

        # base directory that's transformed by the ``work_dir`` property into
        # an absolute path.
        self._working_dir = working_directory
        # Directory to put things to allow them to be statically available to
        # public users.
        self._static_data_prefix = "static/data"
        self._static_data_dir = osp.join(self.work_dir, 'static')

        # Custom static host sub-module
        self.mod_static_dir = StaticDirectoryHost('%s_static' % self.name,
                                                  self._static_data_dir,
                                                  self._static_data_prefix)
        self.register_blueprint(self.mod_static_dir)

        # Uploader Sub-Module
        self.upload_work_dir = os.path.join(self.work_dir, "uploads")
        self.mod_upload = FileUploadMod('%s_uploader' % self.name,
                                        parent_app,
                                        self.upload_work_dir,
                                        url_prefix='/uploader')
        self.register_blueprint(self.mod_upload)

        # IQR Session control and resources
        # TODO: Move session management to database/remote?
        #       Create web-specific IqrSession class that stores/gets its state
        #       directly from database.
        self._iqr_controller = IqrController()
        # Mapping of session IDs to their work directory
        #: :type: dict[collections.Hashable, str]
        self._iqr_work_dirs = {}
        # Mapping of session ID to a dictionary of the custom example data for
        # a session (uuid -> DataElement)
        #: :type: dict[collections.Hashable, dict[collections.Hashable, smqtk.representation.DataElement]]
        self._iqr_example_data = {}
        # Descriptors of example data
        #: :type: dict[collections.Hashable, dict[collections.Hashable, smqtk.representation.DescriptorElement]]
        self._iqr_example_pos_descr = {}

        # Preview Image Caching
        self._preview_cache = PreviewCache(
            osp.join(self._static_data_dir, "previews"))

        # Cache mapping of written static files for data elements
        self._static_cache = {}
        self._static_cache_element = {}

        #
        # Routing
        #

        @self.route("/")
        @self._parent_app.module_login.login_required
        def index():
            r = {
                "module_name": self.name,
                "uploader_url": self.mod_upload.url_prefix,
                "uploader_post_url": self.mod_upload.upload_post_url(),
            }
            r.update(parent_app.nav_bar_content())
            # noinspection PyUnresolvedReferences
            return flask.render_template("iqr_search_index.html", **r)

        @self.route('/iqr_session_info', methods=["GET"])
        @self._parent_app.module_login.login_required
        def iqr_session_info():
            """
            Get information about the current IRQ session
            """
            with self.get_current_iqr_session() as iqrs:
                # noinspection PyProtectedMember
                return flask.jsonify({
                    "uuid":
                    iqrs.uuid,
                    "descriptor_type":
                    self._descriptor_generator.name,
                    "nn_index_type":
                    self._nn_index.name,
                    "relevancy_index_type":
                    self._rel_index_config['type'],
                    "positive_uids":
                    tuple(d.uuid() for d in iqrs.positive_descriptors),
                    "negative_uids":
                    tuple(d.uuid() for d in iqrs.negative_descriptors),

                    # UUIDs of example positive descriptors
                    "ex_pos":
                    tuple(self._iqr_example_pos_descr[iqrs.uuid]),
                    "initialized":
                    iqrs.working_index.count() > 0,
                    "index_size":
                    iqrs.working_index.count(),
                })

        @self.route('/get_iqr_state')
        @self._parent_app.module_login.login_required
        def iqr_session_state():
            """
            Get IQR session state information composed of positive and negative
            descriptor vectors.
            """
            with self.get_current_iqr_session() as iqrs:
                iqrs_uuid = str(iqrs.uuid)
                pos_elements = list(
                    set(
                        # Pos user examples
                        [
                            tuple(d.vector().tolist()) for d in
                            self._iqr_example_pos_descr[iqrs.uuid].values()
                        ] +
                        # Adjudicated examples
                        [
                            tuple(d.vector().tolist())
                            for d in iqrs.positive_descriptors
                        ], ))
                neg_elements = list(
                    set(
                        # No negative user example support yet
                        # Adjudicated examples
                        [
                            tuple(d.vector().tolist())
                            for d in iqrs.negative_descriptors
                        ], ))

            z_buffer = StringIO()
            z = zipfile.ZipFile(z_buffer, 'w', zipfile.ZIP_DEFLATED)
            z.writestr(
                iqrs_uuid,
                json.dumps({
                    'pos': pos_elements,
                    'neg': neg_elements,
                }))
            z.close()

            z_buffer.seek(0)

            return flask.send_file(
                z_buffer,
                mimetype='application/octet-stream',
                as_attachment=True,
                attachment_filename="%s.IqrState" % iqrs_uuid,
            )

        @self.route("/check_current_iqr_session")
        @self._parent_app.module_login.login_required
        def check_current_iqr_session():
            """
            Check that the current IQR session exists and is initialized.

            :rtype: {
                    success: bool
                }
            """
            # Getting the current IQR session ensures that one has been
            # constructed for the current session.
            with self.get_current_iqr_session():
                return flask.jsonify({"success": True})

        @self.route("/get_data_preview_image", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_ingest_item_image_rep():
            """
            Return the base64 preview image data for the data file associated
            with the give UID.
            """
            uid = flask.request.args['uid']

            info = {
                "success": True,
                "message": None,
                "shape": None,  # (width, height)
                "static_file_link": None,
                "static_preview_link": None,
            }

            # Try to find a DataElement by the given UUID in our indexed data
            # or in the session's example data.
            if self._data_set.has_uuid(uid):
                #: :type: smqtk.representation.DataElement
                de = self._data_set.get_data(uid)
            else:
                with self.get_current_iqr_session() as iqrs:
                    #: :type: smqtk.representation.DataElement | None
                    de = self._iqr_example_data[iqrs.uuid].get(uid, None)

            if not de:
                info["success"] = False
                info["message"] = "UUID not part of the active data set!"
            else:
                # Preview_path should be a path within our statically hosted
                # area.
                preview_path = self._preview_cache.get_preview_image(de)
                img = PIL.Image.open(preview_path)
                info["shape"] = img.size

                if de.uuid() not in self._static_cache:
                    self._static_cache[de.uuid()] = \
                        de.write_temp(self._static_data_dir)
                    self._static_cache_element[de.uuid()] = de

                # Need to format links by transforming the generated paths to
                # something usable by webpage:
                # - make relative to the static directory, and then pre-pending
                #   the known static url to the
                info["static_preview_link"] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(preview_path, self._static_data_dir)
                info['static_file_link'] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(self._static_cache[de.uuid()],
                                    self._static_data_dir)

            return flask.jsonify(info)

        @self.route('/iqr_ingest_file', methods=['POST'])
        @self._parent_app.module_login.login_required
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                with self.get_current_iqr_session() as iqrs:
                    fid = flask.request.form['fid']

                    self._log.debug(
                        "[%s::%s] Getting temporary filepath from "
                        "uploader module", iqrs.uuid, fid)
                    upload_filepath = self.mod_upload.get_path_for_id(fid)
                    self.mod_upload.clear_completed(fid)

                    self._log.debug("[%s::%s] Moving uploaded file", iqrs.uuid,
                                    fid)
                    sess_upload = osp.join(self._iqr_work_dirs[iqrs.uuid],
                                           osp.basename(upload_filepath))
                    os.rename(upload_filepath, sess_upload)
                    upload_data = DataFileElement(sess_upload)
                    uuid = upload_data.uuid()
                    self._iqr_example_data[iqrs.uuid][uuid] = upload_data

                    # Extend session ingest -- modifying
                    self._log.debug(
                        "[%s::%s] Adding new data to session "
                        "positives", iqrs.uuid, fid)
                    # iqrs.add_positive_data(upload_data)
                    try:
                        upload_descr = \
                            self._descriptor_generator.compute_descriptor(
                                upload_data, self._descr_elem_factory
                            )
                    except ValueError, ex:
                        return "Input Error: %s" % str(ex), 400

                    self._iqr_example_pos_descr[iqrs.uuid][uuid] = upload_descr
                    iqrs.adjudicate((upload_descr, ))

                    return str(uuid)
示例#3
0
    def __init__(self,
                 parent_app,
                 data_set,
                 descr_generator,
                 nn_index,
                 working_directory,
                 rel_index_config=DFLT_REL_INDEX_CONFIG,
                 descriptor_factory=DFLT_DESCRIPTOR_FACTORY,
                 pos_seed_neighbors=500):
        """
        Initialize a generic IQR Search module with a single descriptor and
        indexer.

        :param name: Name of this blueprint instance
        :type name: str

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.IqrSearchDispatcher

        :param data_set: DataSet instance that references indexed data.
        :type data_set: SMQTK.representation.DataSet

        :param descr_generator: DescriptorGenerator instance to use in IQR
            sessions for generating descriptors on new data.
        :type descr_generator:
            smqtk.algorithms.descriptor_generator.DescriptorGenerator

        :param nn_index: NearestNeighborsIndex instance for sessions to pull
            their review data sets from.
        :type nn_index: smqtk.algorithms.NearestNeighborsIndex

        :param rel_index_config: Plugin configuration for the RelevancyIndex to
            use.
        :type rel_index_config: dict

        :param working_directory: Directory in which to place working files.
            These may be considered temporary and may be removed between
            executions of this app. Retention of a work directory may speed
            things up in subsequent runs because of caching.

        :param descriptor_factory: DescriptorElementFactory for producing new
            DescriptorElement instances when data is uploaded to the server.
        :type descriptor_factory: DescriptorElementFactory

        :param url_prefix: Web address prefix for this blueprint.
        :type url_prefix: str

        :param pos_seed_neighbors: Number of neighbors to pull from the given
            ``nn_index`` for each positive exemplar when populating the working
            index, i.e. this value determines the size of the working index for
            IQR refinement. By default, we try to get 500 neighbors.

            Since there may be partial to significant overlap of near neighbors
            as a result of nn_index queries for positive exemplars, the working
            index may contain anywhere from this value's number of entries, to
            ``N*P``, where ``N`` is this value and ``P`` is the number of
            positive examples at the time of working index initialization.
        :type pos_seed_neighbors: int

        :raises ValueError: Invalid Descriptor or indexer type

        """
        super(IqrSearch, self).__init__(
            import_name=__name__,
            static_folder=os.path.join(SCRIPT_DIR, "static"),
            template_folder=os.path.join(SCRIPT_DIR, "templates"),
        )

        self._parent_app = parent_app
        self._data_set = data_set
        self._descriptor_generator = descr_generator
        self._nn_index = nn_index
        self._rel_index_config = rel_index_config
        self._descr_elem_factory = descriptor_factory

        self._pos_seed_neighbors = int(pos_seed_neighbors)

        # base directory that's transformed by the ``work_dir`` property into
        # an absolute path.
        self._working_dir = working_directory
        # Directory to put things to allow them to be statically available to
        # public users.
        self._static_data_prefix = "static/data"
        self._static_data_dir = osp.join(self.work_dir, 'static')

        # Custom static host sub-module
        self.mod_static_dir = StaticDirectoryHost('%s_static' % self.name,
                                                  self._static_data_dir,
                                                  self._static_data_prefix)
        self.register_blueprint(self.mod_static_dir)

        # Uploader Sub-Module
        self.upload_work_dir = os.path.join(self.work_dir, "uploads")
        self.mod_upload = FileUploadMod('%s_uploader' % self.name,
                                        parent_app,
                                        self.upload_work_dir,
                                        url_prefix='/uploader')
        self.register_blueprint(self.mod_upload)
        self.register_blueprint(parent_app.module_login)

        # IQR Session control and resources
        # TODO: Move session management to database/remote?
        #       Create web-specific IqrSession class that stores/gets its state
        #       directly from database.
        self._iqr_controller = IqrController()
        # Mapping of session IDs to their work directory
        #: :type: dict[collections.Hashable, str]
        self._iqr_work_dirs = {}
        # Mapping of session ID to a dictionary of the custom example data for
        # a session (uuid -> DataElement)
        #: :type: dict[collections.Hashable, dict[collections.Hashable, smqtk.representation.DataElement]]
        self._iqr_example_data = {}
        # Descriptors of example data
        #: :type: dict[collections.Hashable, dict[collections.Hashable, smqtk.representation.DescriptorElement]]
        self._iqr_example_pos_descr = {}

        # Preview Image Caching
        self._preview_cache = PreviewCache(
            osp.join(self._static_data_dir, "previews"))

        # Cache mapping of written static files for data elements
        self._static_cache = {}
        self._static_cache_element = {}

        #
        # Routing
        #

        @self.route("/")
        @self._parent_app.module_login.login_required
        def index():
            # Stripping left '/' from blueprint modules in order to make sure
            # the paths are relative to our base.
            r = {
                "module_name": self.name,
                "uploader_url": self.mod_upload.url_prefix.lstrip('/'),
                "uploader_post_url":
                self.mod_upload.upload_post_url().lstrip('/'),
            }
            self._log.debug("Uploader URL: %s", r['uploader_url'])
            # noinspection PyUnresolvedReferences
            return flask.render_template("iqr_search_index.html", **r)

        @self.route('/iqr_session_info', methods=["GET"])
        @self._parent_app.module_login.login_required
        def iqr_session_info():
            """
            Get information about the current IRQ session
            """
            with self.get_current_iqr_session() as iqrs:
                # noinspection PyProtectedMember
                return flask.jsonify({
                    "uuid":
                    iqrs.uuid,
                    "descriptor_type":
                    self._descriptor_generator.name,
                    "nn_index_type":
                    self._nn_index.name,
                    "relevancy_index_type":
                    self._rel_index_config['type'],
                    "positive_uids":
                    tuple(d.uuid() for d in iqrs.positive_descriptors),
                    "negative_uids":
                    tuple(d.uuid() for d in iqrs.negative_descriptors),

                    # UUIDs of example positive descriptors
                    "ex_pos":
                    tuple(self._iqr_example_pos_descr[iqrs.uuid]),
                    "ex_neg": (),  # No user negative examples supported yet
                    "initialized":
                    iqrs.working_index.count() > 0,
                    "index_size":
                    iqrs.working_index.count(),
                })

        @self.route('/get_iqr_state')
        @self._parent_app.module_login.login_required
        def iqr_session_state():
            """
            Get IQR session state information composed of positive and negative
            descriptor vectors.
            """
            with self.get_current_iqr_session() as iqrs:
                iqrs_uuid = str(iqrs.uuid)
                pos_elements = list(
                    set(
                        # Pos user examples
                        [
                            tuple(d.vector().tolist()) for d in
                            self._iqr_example_pos_descr[iqrs.uuid].values()
                        ] +
                        # Adjudicated examples
                        [
                            tuple(d.vector().tolist())
                            for d in iqrs.positive_descriptors
                        ], ))
                neg_elements = list(
                    set(
                        # No negative user example support yet
                        # Adjudicated examples
                        [
                            tuple(d.vector().tolist())
                            for d in iqrs.negative_descriptors
                        ], ))

            z_buffer = StringIO()
            z = zipfile.ZipFile(z_buffer, 'w', zipfile.ZIP_DEFLATED)
            z.writestr(
                iqrs_uuid,
                json.dumps({
                    'pos': pos_elements,
                    'neg': neg_elements,
                }))
            z.close()

            z_buffer.seek(0)

            return flask.send_file(
                z_buffer,
                mimetype='application/octet-stream',
                as_attachment=True,
                attachment_filename="%s.IqrState" % iqrs_uuid,
            )

        @self.route("/check_current_iqr_session")
        @self._parent_app.module_login.login_required
        def check_current_iqr_session():
            """
            Check that the current IQR session exists and is initialized.

            :rtype: {
                    success: bool
                }
            """
            # Getting the current IQR session ensures that one has been
            # constructed for the current session.
            with self.get_current_iqr_session():
                return flask.jsonify({"success": True})

        @self.route("/get_data_preview_image", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_ingest_item_image_rep():
            """
            Return the base64 preview image data for the data file associated
            with the give UID.
            """
            uid = flask.request.args['uid']

            info = {
                "success": True,
                "message": None,
                "shape": None,  # (width, height)
                "static_file_link": None,
                "static_preview_link": None,
            }

            # Try to find a DataElement by the given UUID in our indexed data
            # or in the session's example data.
            if self._data_set.has_uuid(uid):
                #: :type: smqtk.representation.DataElement
                de = self._data_set.get_data(uid)
            else:
                with self.get_current_iqr_session() as iqrs:
                    #: :type: smqtk.representation.DataElement | None
                    de = self._iqr_example_data[iqrs.uuid].get(uid, None)

            if not de:
                info["success"] = False
                info["message"] = "UUID not part of the active data set!"
            else:
                # Preview_path should be a path within our statically hosted
                # area.
                preview_path = self._preview_cache.get_preview_image(de)
                img = PIL.Image.open(preview_path)
                info["shape"] = img.size

                if de.uuid() not in self._static_cache:
                    self._static_cache[de.uuid()] = \
                        de.write_temp(self._static_data_dir)
                    self._static_cache_element[de.uuid()] = de

                # Need to format links by transforming the generated paths to
                # something usable by webpage:
                # - make relative to the static directory, and then pre-pending
                #   the known static url to the
                info["static_preview_link"] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(preview_path, self._static_data_dir)
                info['static_file_link'] = \
                    self._static_data_prefix + '/' + \
                    os.path.relpath(self._static_cache[de.uuid()],
                                    self._static_data_dir)

            return flask.jsonify(info)

        @self.route('/iqr_ingest_file', methods=['POST'])
        @self._parent_app.module_login.login_required
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                with self.get_current_iqr_session() as iqrs:
                    fid = flask.request.form['fid']

                    self._log.debug(
                        "[%s::%s] Getting temporary filepath from "
                        "uploader module", iqrs.uuid, fid)
                    upload_filepath = self.mod_upload.get_path_for_id(fid)
                    self.mod_upload.clear_completed(fid)

                    self._log.debug("[%s::%s] Moving uploaded file", iqrs.uuid,
                                    fid)
                    sess_upload = osp.join(self._iqr_work_dirs[iqrs.uuid],
                                           osp.basename(upload_filepath))
                    os.rename(upload_filepath, sess_upload)
                    upload_data = DataFileElement(sess_upload)
                    uuid = upload_data.uuid()
                    self._iqr_example_data[iqrs.uuid][uuid] = upload_data

                    # Extend session ingest -- modifying
                    self._log.debug(
                        "[%s::%s] Adding new data to session "
                        "positives", iqrs.uuid, fid)
                    # iqrs.add_positive_data(upload_data)
                    try:
                        upload_descr = \
                            self._descriptor_generator.compute_descriptor(
                                upload_data, self._descr_elem_factory
                            )
                    except ValueError as ex:
                        return "Input Error: %s" % str(ex), 400

                    self._iqr_example_pos_descr[iqrs.uuid][uuid] = upload_descr
                    iqrs.adjudicate((upload_descr, ))

                    return str(uuid)

        @self.route("/iqr_initialize", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_initialize():
            """
            Initialize IQR session working index based on current positive
            examples and adjudications.
            """
            with self.get_current_iqr_session() as iqrs:
                try:
                    iqrs.update_working_index(self._nn_index)
                    return flask.jsonify({
                        "success": True,
                        "message": "Completed initialization",
                    })
                except Exception as ex:
                    return flask.jsonify({
                        "success":
                        False,
                        "message":
                        "ERROR: (%s) %s" % (type(ex).__name__, str(ex))
                    })

        @self.route("/get_example_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_example_adjudication():
            """
            Get positive/negative status for a data/descriptor in our example
            set.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }

            """
            elem_uuid = flask.request.args['uid']
            with self.get_current_iqr_session() as iqrs:
                is_p = elem_uuid in self._iqr_example_pos_descr[iqrs.uuid]
                # Currently no negative example support
                is_n = False

                return flask.jsonify({
                    "is_pos": is_p,
                    "is_neg": is_n,
                })

        @self.route("/get_index_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_index_adjudication():
            """
            Get the adjudication status of a particular data/descriptor element
            by UUID.

            This should only ever return a dict where one of the two, or
            neither, are labeled True.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }
            """
            elem_uuid = flask.request.args['uid']
            with self.get_current_iqr_session() as iqrs:
                is_p = (elem_uuid
                        in set(d.uuid() for d in iqrs.positive_descriptors))
                is_n = (elem_uuid
                        in set(d.uuid() for d in iqrs.negative_descriptors))

                return flask.jsonify({
                    "is_pos": is_p,
                    "is_neg": is_n,
                })

        @self.route("/adjudicate", methods=["POST", "GET"])
        @self._parent_app.module_login.login_required
        def adjudicate():
            """
            Update adjudication for this session. This should specify UUIDs of
            data/descriptor elements in our working index.

            :return: {
                    success: <bool>,
                    message: <str>
                }
            """
            if flask.request.method == "POST":
                fetch = flask.request.form
            elif flask.request.method == "GET":
                fetch = flask.request.args
            else:
                raise RuntimeError("Invalid request method '%s'" %
                                   flask.request.method)

            pos_to_add = json.loads(fetch.get('add_pos', '[]'))
            pos_to_remove = json.loads(fetch.get('remove_pos', '[]'))
            neg_to_add = json.loads(fetch.get('add_neg', '[]'))
            neg_to_remove = json.loads(fetch.get('remove_neg', '[]'))

            self._log.debug(
                "Adjudicated Positive{+%s, -%s}, "
                "Negative{+%s, -%s} " %
                (pos_to_add, pos_to_remove, neg_to_add, neg_to_remove))

            with self.get_current_iqr_session() as iqrs:
                iqrs.adjudicate(
                    tuple(iqrs.working_index.get_many_descriptors(pos_to_add)),
                    tuple(iqrs.working_index.get_many_descriptors(neg_to_add)),
                    tuple(
                        iqrs.working_index.get_many_descriptors(
                            pos_to_remove)),
                    tuple(
                        iqrs.working_index.get_many_descriptors(
                            neg_to_remove)),
                )
                self._log.debug("Now positive UUIDs: %s",
                                iqrs.positive_descriptors)
                self._log.debug("Now negative UUIDs: %s",
                                iqrs.negative_descriptors)

            return flask.jsonify({
                "success":
                True,
                "message":
                "Adjudicated Positive{+%s, -%s}, "
                "Negative{+%s, -%s} " %
                (pos_to_add, pos_to_remove, neg_to_add, neg_to_remove)
            })

        @self.route("/iqr_refine", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_refine():
            """
            Classify current IQR session indexer, updating ranking for
            display.

            Fails gracefully if there are no positive[/negative] adjudications.

            """
            with self.get_current_iqr_session() as iqrs:
                try:
                    iqrs.refine()
                    return flask.jsonify({
                        "success": True,
                        "message": "Completed refinement"
                    })
                except Exception as ex:
                    return flask.jsonify({
                        "success":
                        False,
                        "message":
                        "ERROR: (%s) %s" % (type(ex).__name__, str(ex))
                    })

        @self.route("/iqr_ordered_results", methods=['GET'])
        @self._parent_app.module_login.login_required
        def get_ordered_results():
            """
            Get ordered (UID, probability) pairs in between the given indices,
            [i, j). If j Is beyond the end of available results, only available
            results are returned.

            This may be empty if no refinement has yet occurred.

            Return format:
            {
                results: [ (uid, probability), ... ]
            }
            """
            with self.get_current_iqr_session() as iqrs:
                i = int(flask.request.args.get('i', 0))
                j = int(
                    flask.request.args.get(
                        'j',
                        len(iqrs.results) if iqrs.results else 0))
                #: :type: tuple[(smqtk.representation.DescriptorElement, float)]
                r = (iqrs.ordered_results() or ())[i:j]
                return flask.jsonify(
                    {"results": [(d.uuid(), p) for d, p in r]})

        @self.route("/reset_iqr_session", methods=["GET"])
        @self._parent_app.module_login.login_required
        def reset_iqr_session():
            """
            Reset the current IQR session
            """
            with self.get_current_iqr_session() as iqrs:
                iqrs.reset()

                # Clearing working directory
                if os.path.isdir(self._iqr_work_dirs[iqrs.uuid]):
                    shutil.rmtree(self._iqr_work_dirs[iqrs.uuid])
                safe_create_dir(self._iqr_work_dirs[iqrs.uuid])

                # Clearing example data + descriptors
                self._iqr_example_data[iqrs.uuid].clear()
                self._iqr_example_pos_descr[iqrs.uuid].clear()

                return flask.jsonify({"success": True})

        @self.route("/get_random_uids")
        @self._parent_app.module_login.login_required
        def get_random_uids():
            """
            Return to the client a list of working index IDs but in a random
            order. If there is currently an active IQR session with elements in
            its extension ingest, then those IDs are included in the random
            list.

            :return: {
                    uids: list of int
                }
            """
            with self.get_current_iqr_session() as iqrs:
                all_ids = list(iqrs.working_index.keys())
            random.shuffle(all_ids)
            return flask.jsonify({"uids": all_ids})