Exemplo n.º 1
0
    def get_current_iqr_session(self):
        """
        Get the current IQR Session instance.

        :rtype: smqtk.IQR.iqr_session.IqrSession

        """
        with self._iqr_controller:
            sid = flask.session.sid
            if not self._iqr_controller.has_session_uuid(sid):
                sid_work_dir = osp.join(self.work_dir, sid)

                descriptor = ContentDescriptorConfiguration.new_inst(self._fd_type_str)
                indexer = IndexerConfiguration.new_inst(self._idxr_type_str)

                iqr_sess = IqrSession(sid_work_dir, descriptor, indexer, sid)
                self._iqr_controller.add_session(iqr_sess, sid)

                # If there are things already in our extension ingest, extend
                # the base indexer
                feat_map = \
                    descriptor.compute_descriptor_async(iqr_sess.extension_ds)
                indexer.extend_model(feat_map)

            return self._iqr_controller.get_session(sid)
Exemplo n.º 2
0
def main():
    import optparse
    description = \
        "Generate the model for the given indexer type, using features " \
        "from the given feature descriptor type. We use configured valued in " \
        "the smqtk_config module and from the system configuration JSON file " \
        "(etc/system_config.json) unless otherwise specified by options to " \
        "this script. Specific ingest used is determined by the ingest type " \
        "provided (-t/--type)."
    parser = bin_utils.SMQTKOptParser(description=description)
    group_required = optparse.OptionGroup(parser, "Required Options")
    group_optional = optparse.OptionGroup(parser, "Optional")

    group_required.add_option('-d', '--data-set',
                              help="Data set to use for model generation.")
    group_required.add_option('-c', '--content-descriptor',
                              help="Feature descriptor type for model and "
                                   "feature generation.")
    group_required.add_option('-i', '--indexer',
                              help="(Optional) Indexer type for model "
                                   "generation.")

    group_optional.add_option('--sys-json',
                              help="Custom system configuration JSON file to "
                                   "use. Otherwise we use the one specified in "
                                   "the smqtk_config module.")
    group_optional.add_option('-l', '--list',
                              action='store_true', default=False,
                              help="List available ingest configurations. If "
                                   "a valid ingest configuration has been "
                                   "specified, we list available "
                                   "FeatureDetector and Indexer configurations "
                                   "available.")
    group_optional.add_option('-t', '--threads', type=int, default=None,
                              help='Number of threads/processes to use for '
                                   'processing. By default we use all '
                                   'available cores/threads.')
    group_optional.add_option('-v', '--verbose', action='store_true',
                              default=False,
                              help='Add debug messaged to output logging.')

    parser.add_option_group(group_required)
    parser.add_option_group(group_optional)
    opts, args = parser.parse_args()

    bin_utils.initialize_logging(logging.getLogger(),
                                logging.INFO - (10*opts.verbose))
    log = logging.getLogger("main")

    dset_label = opts.data_set
    cd_label = opts.content_descriptor
    idxr_label = opts.indexer
    parallel = opts.threads

    # Prep custom JSON configuration if one was given
    if opts.sys_json:
        with open(opts.sys_json) as json_file:
            json_config = json.loads(jsmin(json_file.read()))
        ConfigurationInterface.BASE_CONFIG = json_config['Ingests']

    if opts.list:
        log.info("")
        log.info("Available Data Sets:")
        log.info("")
        for l in DataSetConfiguration.available_labels():
            log.info("\t%s" % l)
        log.info("")
        log.info("Available ContentDescriptor types:")
        log.info("")
        for l in ContentDescriptorConfiguration.available_labels():
            log.info("\t%s" % l)
        log.info("")
        log.info("Available Indexer types:")
        log.info("")
        for l in IndexerConfiguration.available_labels():
            log.info("\t%s", l)
        log.info("")
        exit(0)

    # Check given labels
    fail = False
    if dset_label and dset_label not in DataSetConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "data set configuration!", dset_label)
        fail = True
    if cd_label and cd_label not in ContentDescriptorConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "content descriptor configuration!", cd_label)
        fail = True
    if idxr_label and idxr_label not in IndexerConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "indexer configuration!", idxr_label)
        fail = True
    if fail:
        exit(1)
    del fail

    log.info("Loading data-set instance...")
    #: :type: DataIngest or VideoIngest
    dset = DataSetConfiguration.new_inst(dset_label)

    log.info("Loading descriptor instance...")
    #: :type: smqtk.content_description.ContentDescriptor
    descriptor = ContentDescriptorConfiguration.new_inst(cd_label)
    # Generate any model files needed by the chosen descriptor
    descriptor.PARALLEL = parallel
    descriptor.generate_model(dset)

    # Don't do indexer model generation if a type was not provided
    if idxr_label:
        log.info("Loading indexer instance...")
        #: :type: smqtk.indexing.Indexer
        indexer = IndexerConfiguration.new_inst(idxr_label)

        # It is not guaranteed that the feature computation method is doing
        # anything in parallel, but if it is, request that it perform serially
        # in order to allow multiple high-level feature computation jobs, else
        # we could be overrun with threads.
        descriptor.PARALLEL = 1
        # Using NonDaemonicPool because content_description that might to
        # parallel processing might use multiprocessing.Pool instances, too.
        # Pools don't usually allow daemonic processes, so this custom top-level
        # pool allows worker processes to spawn pools themselves.
        fmap = descriptor.compute_descriptor_async(
            dset,
            parallel=parallel,
            pool_type=NonDaemonicPool
        )

        indexer.generate_model(fmap, parallel=parallel)
Exemplo n.º 3
0
    def __init__(self, name, parent_app, data_set,
                 descriptor_type, indexer_type,
                 url_prefix=None):
        """
        Initialize a generic IQR Search module with a single descriptor and
        indexer.

        :param name: Name of this blueprint instance
        :type name: str

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :param data_set: Data set to work over
        :type data_set: SMQTK.data_rep.DataSet

        :param descriptor_type: Feature Descriptor type string
        :type descriptor_type: str

        :param indexer_type: indexer type string
        :type indexer_type: str

        :param url_prefix: Web address prefix for this blueprint.
        :type url_prefix: str

        :raises ValueError: Invalid Descriptor or indexer type

        """
        super(IQRSearch, self).__init__(
            name, import_name=__name__,
            static_folder=os.path.join(SCRIPT_DIR, "static"),
            template_folder=os.path.join(SCRIPT_DIR, "templates"),
            url_prefix=url_prefix
        )

        # Make sure that the configured descriptor/indexer types exist, as
        # we as their system configuration sections
        if descriptor_type not in ContentDescriptorConfiguration.available_labels():
            raise ValueError("'%s' not a valid descriptor type" % descriptor_type)
        if indexer_type not in IndexerConfiguration.available_labels():
            raise ValueError("'%s' not a valid indexer type" % indexer_type)

        self._parent_app = parent_app
        self._data_set = data_set
        self._fd_type_str = descriptor_type
        self._idxr_type_str = indexer_type

        self._explicit_uids = set()
        self._explicit_uids_lock = multiprocessing.RLock()
        # TODO: Read in dict from save file

        # Uploader Sub-Module
        self.upload_work_dir = os.path.join(self.work_dir, "uploads")
        self.mod_upload = FileUploadMod('%s_uploader' % self.name, parent_app,
                                        self.upload_work_dir,
                                        url_prefix='/uploader')
        self.register_blueprint(self.mod_upload)

        # IQR Session control
        # TODO: Move session management to database. Create web-specific
        #       IqrSession class that stores/gets its state directly from
        #       database.
        self._iqr_controller = IqrController()

        # structures for session ingest progress
        # Two levels: SID -> FID
        self._ingest_progress_locks = {}
        self._ingest_progress = {}

        # Preview Image Caching
        # TODO: Initialize this into static directory that is being served.
        self._preview_cache = PreviewCache(osp.join(self.work_dir, "Previews"))

        # Directory to write data for static viewing
        self._static_data_dir = os.path.join(self.static_folder, 'tmp_data')
        # Cache mapping of written static files for data elements
        self._static_cache = {}

        #
        # Routing
        #

        @self.route("/")
        @self._parent_app.module_login.login_required
        def index():
            r = {
                "module_name": self.name,
                "uploader_url": self.mod_upload.url_prefix,
                "uploader_post_url": self.mod_upload.upload_post_url(),
            }
            r.update(parent_app.nav_bar_content())
            # noinspection PyUnresolvedReferences
            return flask.render_template("iqr_search_index.html", **r)

        @self.route('/iqr_session_info', methods=["GET"])
        @self._parent_app.module_login.login_required
        def iqr_session_info():
            """
            Get information about the current IRQ session
            """
            with self.get_current_iqr_session() as iqrs:
                # noinspection PyProtectedMember
                return flask.jsonify({
                    "uuid": iqrs.uuid,
                    "positive_uids": tuple(iqrs.positive_ids),
                    "negative_uids": tuple(iqrs.negative_ids),
                    "extension_ingest_contents":
                        dict((uid, str(df))
                             for uid, df in iqrs.extension_ds.iteritems()),
                    "FeatureMemory": {
                    }
                })

        @self.route("/check_current_iqr_session")
        @self._parent_app.module_login.login_required
        def check_current_iqr_session():
            """
            Check that the current IQR session exists and is initialized.

            :rtype: {
                    success: bool
                }
            """
            # Getting the current IQR session ensures that one has been
            # constructed for the current session.
            with self.get_current_iqr_session():
                return flask.jsonify({
                    "success": True
                })

        @self.route('/iqr_ingest_file', methods=['POST'])
        @self._parent_app.module_login.login_required
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: status message
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                iqr_sess = self.get_current_iqr_session()
                fid = flask.request.form['fid']

                self.log.debug("[%s::%s] Getting temporary filepath from "
                               "uploader module", iqr_sess.uuid, fid)
                upload_filepath = self.mod_upload.get_path_for_id(fid)
                self.mod_upload.clear_completed(fid)

                # Extend session ingest -- modifying
                with iqr_sess:
                    self.log.debug("[%s::%s] Adding new file to extension "
                                   "ingest", iqr_sess.uuid, fid)
                    sess_upload = osp.join(iqr_sess.work_dir,
                                           osp.basename(upload_filepath))
                    os.rename(upload_filepath, sess_upload)
                    upload_data = DataFileElement(sess_upload)
                    iqr_sess.extension_ds.add_data(upload_data)

                # Compute feature for data -- non-modifying
                self.log.debug("[%s::%s] Computing feature for file",
                               iqr_sess.uuid, fid)
                feat = iqr_sess.descriptor.compute_descriptor(upload_data)

                # Extend indexer model with feature data -- modifying
                with iqr_sess:
                    self.log.debug("[%s::%s] Extending indexer model with "
                                   "feature", iqr_sess.uuid, fid)
                    iqr_sess.indexer.extend_model({upload_data.uuid(): feat})

                    # of course, add the new data element as a positive
                    iqr_sess.adjudicate((upload_data.uuid(),))

                return "Finished Ingestion"

        @self.route("/adjudicate", methods=["POST", "GET"])
        @self._parent_app.module_login.login_required
        def adjudicate():
            """
            Update adjudication for this session

            :return: {
                    success: <bool>,
                    message: <str>
                }
            """
            if flask.request.method == "POST":
                fetch = flask.request.form
            elif flask.request.method == "GET":
                fetch = flask.request.args
            else:
                raise RuntimeError("Invalid request method '%s'"
                                   % flask.request.method)

            pos_to_add = json.loads(fetch.get('add_pos', '[]'))
            pos_to_remove = json.loads(fetch.get('remove_pos', '[]'))
            neg_to_add = json.loads(fetch.get('add_neg', '[]'))
            neg_to_remove = json.loads(fetch.get('remove_neg', '[]'))

            self.log.debug("Adjudicated Positive{+%s, -%s}, Negative{+%s, -%s} "
                           % (pos_to_add, pos_to_remove,
                              neg_to_add, neg_to_remove))

            with self.get_current_iqr_session() as iqrs:
                iqrs.adjudicate(pos_to_add, neg_to_add,
                                pos_to_remove, neg_to_remove)
            return flask.jsonify({
                "success": True,
                "message": "Adjudicated Positive{+%s, -%s}, Negative{+%s, -%s} "
                           % (pos_to_add, pos_to_remove,
                              neg_to_add, neg_to_remove)
            })

        @self.route("/get_item_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_adjudication():
            """
            Get the adjudication status of a particular result by ingest ID.

            This should only ever return a dict where one of the two, or
            neither, are labeled True.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }
            """
            ingest_uid = flask.request.args['uid']
            with self.get_current_iqr_session() as iqrs:
                return flask.jsonify({
                    "is_pos": ingest_uid in iqrs.positive_ids,
                    "is_neg": ingest_uid in iqrs.negative_ids
                })

        @self.route("/get_positive_uids", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_positive_uids():
            """
            Get a list of the positive ingest UIDs

            :return: {
                    uids: list of <int>
                }
            """
            with self.get_current_iqr_session() as iqrs:
                return flask.jsonify({
                    "uids": list(iqrs.positive_ids)
                })

        @self.route("/get_random_uids")
        @self._parent_app.module_login.login_required
        def get_random_uids():
            """
            Return to the client a list of all known dataset IDs but in a random
            order. If there is currently an active IQR session with elements in
            its extension ingest, then those IDs are included in the random
            list.

            :return: {
                    uids: list of int
                }
            """
            all_ids = self._data_set.uuids()
            with self.get_current_iqr_session() as iqrs:
                all_ids.update(iqrs.extension_ds.uuids())
            all_ids = list(all_ids)
            random.shuffle(all_ids)
            return flask.jsonify({
                "uids": all_ids
            })

        @self.route("/get_ingest_image_preview_data", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_ingest_item_image_rep():
            """
            Return the base64 preview image data for the data file associated
            with the give UID.
            """
            uid = flask.request.args['uid']

            info = {
                "success": True,
                "message": None,
                "is_explicit": None,
                "shape": None,  # (width, height)
                "data": None,
                "ext": None,
                "static_file_link": None,
            }

            #: :type: smqtk.data_rep.DataElement
            de = None
            if self._data_set.has_uuid(uid):
                de = self._data_set.get_data(uid)
                with self._explicit_uids_lock:
                    info["is_explicit"] = uid in self._explicit_uids
            else:
                with self.get_current_iqr_session() as iqrs:
                    if iqrs.extension_ds.has_uuid(uid):
                        de = iqrs.extension_ds.get_data(uid)
                        info["is_explicit"] = uid in self._explicit_uids

            if not de:
                info["success"] = False
                info["message"] = "UUID not part of the active data set!"
            else:
                # TODO: Have data-file return an HTML chunk for implementation
                #       defined visualization?
                img_path = self._preview_cache.get_preview_image(de)
                img = PIL.Image.open(img_path)
                info["shape"] = img.size
                with open(img_path, 'rb') as img_file:
                    info["data"] = base64.encodestring(img_file.read())
                info["ext"] = osp.splitext(img_path)[1].lstrip('.')

                if de.uuid() not in self._static_cache:
                    self._static_cache[de.uuid()] = \
                        de.write_temp(self._static_data_dir)
                info['static_file_link'] = 'static/' \
                    + os.path.relpath(self._static_cache[de.uuid()],
                                      self.static_folder)

            return flask.jsonify(info)

        @self.route("/mark_uid_explicit", methods=["POST"])
        @self._parent_app.module_login.login_required
        def mark_uid_explicit():
            """
            Mark a given UID as explicit in its containing ingest.

            :return: Success value of True if the given UID was valid and set
                as explicit in its containing ingest.
            :rtype: {
                "success": bool
            }
            """
            uid = flask.request.form['uid']
            self._explicit_uids.add(uid)
            # TODO: Save out dict

            return flask.jsonify({'success': True})

        @self.route("/iqr_refine", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_refine():
            """
            Classify current IQR session indexer, updating ranking for
            display.

            Fails gracefully if there are no positive[/negative] adjudications.

            Expected Args:
            """
            pos_to_add = json.loads(flask.request.form.get('add_pos', '[]'))
            pos_to_remove = json.loads(flask.request.form.get('remove_pos', '[]'))
            neg_to_add = json.loads(flask.request.form.get('add_neg', '[]'))
            neg_to_remove = json.loads(flask.request.form.get('remove_neg', '[]'))

            with self.get_current_iqr_session() as iqrs:
                try:
                    iqrs.refine(pos_to_add, neg_to_add,
                                pos_to_remove, neg_to_remove)
                    return flask.jsonify({
                        "success": True,
                        "message": "Completed refinement"
                    })
                except Exception, ex:
                    return flask.jsonify({
                        "success": False,
                        "message": "ERROR: %s: %s" % (type(ex).__name__,
                                                      ex.message)
                    })