def all_content_types(): """ Of available descriptors, what content types are processable, and what types are associated to which available descriptor generator. """ r = {} all_types = set() for l in ContentDescriptorConfiguration.available_labels(): d = self.get_descriptor_inst(l) all_types.update(d.valid_content_types()) r[l] = sorted(d.valid_content_types()) return flask.jsonify({ "all": sorted(all_types), "labels": r })
def main(): import optparse description = \ "Generate the model for the given indexer type, using features " \ "from the given feature descriptor type. We use configured valued in " \ "the smqtk_config module and from the system configuration JSON file " \ "(etc/system_config.json) unless otherwise specified by options to " \ "this script. Specific ingest used is determined by the ingest type " \ "provided (-t/--type)." parser = bin_utils.SMQTKOptParser(description=description) group_required = optparse.OptionGroup(parser, "Required Options") group_optional = optparse.OptionGroup(parser, "Optional") group_required.add_option('-d', '--data-set', help="Data set to use for model generation.") group_required.add_option('-c', '--content-descriptor', help="Feature descriptor type for model and " "feature generation.") group_required.add_option('-i', '--indexer', help="(Optional) Indexer type for model " "generation.") group_optional.add_option('--sys-json', help="Custom system configuration JSON file to " "use. Otherwise we use the one specified in " "the smqtk_config module.") group_optional.add_option('-l', '--list', action='store_true', default=False, help="List available ingest configurations. If " "a valid ingest configuration has been " "specified, we list available " "FeatureDetector and Indexer configurations " "available.") group_optional.add_option('-t', '--threads', type=int, default=None, help='Number of threads/processes to use for ' 'processing. By default we use all ' 'available cores/threads.') group_optional.add_option('-v', '--verbose', action='store_true', default=False, help='Add debug messaged to output logging.') parser.add_option_group(group_required) parser.add_option_group(group_optional) opts, args = parser.parse_args() bin_utils.initialize_logging(logging.getLogger(), logging.INFO - (10*opts.verbose)) log = logging.getLogger("main") dset_label = opts.data_set cd_label = opts.content_descriptor idxr_label = opts.indexer parallel = opts.threads # Prep custom JSON configuration if one was given if opts.sys_json: with open(opts.sys_json) as json_file: json_config = json.loads(jsmin(json_file.read())) ConfigurationInterface.BASE_CONFIG = json_config['Ingests'] if opts.list: log.info("") log.info("Available Data Sets:") log.info("") for l in DataSetConfiguration.available_labels(): log.info("\t%s" % l) log.info("") log.info("Available ContentDescriptor types:") log.info("") for l in ContentDescriptorConfiguration.available_labels(): log.info("\t%s" % l) log.info("") log.info("Available Indexer types:") log.info("") for l in IndexerConfiguration.available_labels(): log.info("\t%s", l) log.info("") exit(0) # Check given labels fail = False if dset_label and dset_label not in DataSetConfiguration.available_labels(): log.error("Given label '%s' is NOT associated to an existing " "data set configuration!", dset_label) fail = True if cd_label and cd_label not in ContentDescriptorConfiguration.available_labels(): log.error("Given label '%s' is NOT associated to an existing " "content descriptor configuration!", cd_label) fail = True if idxr_label and idxr_label not in IndexerConfiguration.available_labels(): log.error("Given label '%s' is NOT associated to an existing " "indexer configuration!", idxr_label) fail = True if fail: exit(1) del fail log.info("Loading data-set instance...") #: :type: DataIngest or VideoIngest dset = DataSetConfiguration.new_inst(dset_label) log.info("Loading descriptor instance...") #: :type: smqtk.content_description.ContentDescriptor descriptor = ContentDescriptorConfiguration.new_inst(cd_label) # Generate any model files needed by the chosen descriptor descriptor.PARALLEL = parallel descriptor.generate_model(dset) # Don't do indexer model generation if a type was not provided if idxr_label: log.info("Loading indexer instance...") #: :type: smqtk.indexing.Indexer indexer = IndexerConfiguration.new_inst(idxr_label) # It is not guaranteed that the feature computation method is doing # anything in parallel, but if it is, request that it perform serially # in order to allow multiple high-level feature computation jobs, else # we could be overrun with threads. descriptor.PARALLEL = 1 # Using NonDaemonicPool because content_description that might to # parallel processing might use multiprocessing.Pool instances, too. # Pools don't usually allow daemonic processes, so this custom top-level # pool allows worker processes to spawn pools themselves. fmap = descriptor.compute_descriptor_async( dset, parallel=parallel, pool_type=NonDaemonicPool ) indexer.generate_model(fmap, parallel=parallel)
def __init__(self, name, parent_app, data_set, descriptor_type, indexer_type, url_prefix=None): """ Initialize a generic IQR Search module with a single descriptor and indexer. :param name: Name of this blueprint instance :type name: str :param parent_app: Parent containing flask app instance :type parent_app: smqtk.web.search_app.app.search_app :param data_set: Data set to work over :type data_set: SMQTK.data_rep.DataSet :param descriptor_type: Feature Descriptor type string :type descriptor_type: str :param indexer_type: indexer type string :type indexer_type: str :param url_prefix: Web address prefix for this blueprint. :type url_prefix: str :raises ValueError: Invalid Descriptor or indexer type """ super(IQRSearch, self).__init__( name, import_name=__name__, static_folder=os.path.join(SCRIPT_DIR, "static"), template_folder=os.path.join(SCRIPT_DIR, "templates"), url_prefix=url_prefix ) # Make sure that the configured descriptor/indexer types exist, as # we as their system configuration sections if descriptor_type not in ContentDescriptorConfiguration.available_labels(): raise ValueError("'%s' not a valid descriptor type" % descriptor_type) if indexer_type not in IndexerConfiguration.available_labels(): raise ValueError("'%s' not a valid indexer type" % indexer_type) self._parent_app = parent_app self._data_set = data_set self._fd_type_str = descriptor_type self._idxr_type_str = indexer_type self._explicit_uids = set() self._explicit_uids_lock = multiprocessing.RLock() # TODO: Read in dict from save file # Uploader Sub-Module self.upload_work_dir = os.path.join(self.work_dir, "uploads") self.mod_upload = FileUploadMod('%s_uploader' % self.name, parent_app, self.upload_work_dir, url_prefix='/uploader') self.register_blueprint(self.mod_upload) # IQR Session control # TODO: Move session management to database. Create web-specific # IqrSession class that stores/gets its state directly from # database. self._iqr_controller = IqrController() # structures for session ingest progress # Two levels: SID -> FID self._ingest_progress_locks = {} self._ingest_progress = {} # Preview Image Caching # TODO: Initialize this into static directory that is being served. self._preview_cache = PreviewCache(osp.join(self.work_dir, "Previews")) # Directory to write data for static viewing self._static_data_dir = os.path.join(self.static_folder, 'tmp_data') # Cache mapping of written static files for data elements self._static_cache = {} # # Routing # @self.route("/") @self._parent_app.module_login.login_required def index(): r = { "module_name": self.name, "uploader_url": self.mod_upload.url_prefix, "uploader_post_url": self.mod_upload.upload_post_url(), } r.update(parent_app.nav_bar_content()) # noinspection PyUnresolvedReferences return flask.render_template("iqr_search_index.html", **r) @self.route('/iqr_session_info', methods=["GET"]) @self._parent_app.module_login.login_required def iqr_session_info(): """ Get information about the current IRQ session """ with self.get_current_iqr_session() as iqrs: # noinspection PyProtectedMember return flask.jsonify({ "uuid": iqrs.uuid, "positive_uids": tuple(iqrs.positive_ids), "negative_uids": tuple(iqrs.negative_ids), "extension_ingest_contents": dict((uid, str(df)) for uid, df in iqrs.extension_ds.iteritems()), "FeatureMemory": { } }) @self.route("/check_current_iqr_session") @self._parent_app.module_login.login_required def check_current_iqr_session(): """ Check that the current IQR session exists and is initialized. :rtype: { success: bool } """ # Getting the current IQR session ensures that one has been # constructed for the current session. with self.get_current_iqr_session(): return flask.jsonify({ "success": True }) @self.route('/iqr_ingest_file', methods=['POST']) @self._parent_app.module_login.login_required def iqr_ingest_file(): """ Ingest the file with the given UID, getting the path from the uploader. :return: status message :rtype: str """ # TODO: Add status dict with a "GET" method branch for getting that # status information. # Start the ingest of a FID when POST if flask.request.method == "POST": iqr_sess = self.get_current_iqr_session() fid = flask.request.form['fid'] self.log.debug("[%s::%s] Getting temporary filepath from " "uploader module", iqr_sess.uuid, fid) upload_filepath = self.mod_upload.get_path_for_id(fid) self.mod_upload.clear_completed(fid) # Extend session ingest -- modifying with iqr_sess: self.log.debug("[%s::%s] Adding new file to extension " "ingest", iqr_sess.uuid, fid) sess_upload = osp.join(iqr_sess.work_dir, osp.basename(upload_filepath)) os.rename(upload_filepath, sess_upload) upload_data = DataFileElement(sess_upload) iqr_sess.extension_ds.add_data(upload_data) # Compute feature for data -- non-modifying self.log.debug("[%s::%s] Computing feature for file", iqr_sess.uuid, fid) feat = iqr_sess.descriptor.compute_descriptor(upload_data) # Extend indexer model with feature data -- modifying with iqr_sess: self.log.debug("[%s::%s] Extending indexer model with " "feature", iqr_sess.uuid, fid) iqr_sess.indexer.extend_model({upload_data.uuid(): feat}) # of course, add the new data element as a positive iqr_sess.adjudicate((upload_data.uuid(),)) return "Finished Ingestion" @self.route("/adjudicate", methods=["POST", "GET"]) @self._parent_app.module_login.login_required def adjudicate(): """ Update adjudication for this session :return: { success: <bool>, message: <str> } """ if flask.request.method == "POST": fetch = flask.request.form elif flask.request.method == "GET": fetch = flask.request.args else: raise RuntimeError("Invalid request method '%s'" % flask.request.method) pos_to_add = json.loads(fetch.get('add_pos', '[]')) pos_to_remove = json.loads(fetch.get('remove_pos', '[]')) neg_to_add = json.loads(fetch.get('add_neg', '[]')) neg_to_remove = json.loads(fetch.get('remove_neg', '[]')) self.log.debug("Adjudicated Positive{+%s, -%s}, Negative{+%s, -%s} " % (pos_to_add, pos_to_remove, neg_to_add, neg_to_remove)) with self.get_current_iqr_session() as iqrs: iqrs.adjudicate(pos_to_add, neg_to_add, pos_to_remove, neg_to_remove) return flask.jsonify({ "success": True, "message": "Adjudicated Positive{+%s, -%s}, Negative{+%s, -%s} " % (pos_to_add, pos_to_remove, neg_to_add, neg_to_remove) }) @self.route("/get_item_adjudication", methods=["GET"]) @self._parent_app.module_login.login_required def get_adjudication(): """ Get the adjudication status of a particular result by ingest ID. This should only ever return a dict where one of the two, or neither, are labeled True. :return: { is_pos: <bool>, is_neg: <bool> } """ ingest_uid = flask.request.args['uid'] with self.get_current_iqr_session() as iqrs: return flask.jsonify({ "is_pos": ingest_uid in iqrs.positive_ids, "is_neg": ingest_uid in iqrs.negative_ids }) @self.route("/get_positive_uids", methods=["GET"]) @self._parent_app.module_login.login_required def get_positive_uids(): """ Get a list of the positive ingest UIDs :return: { uids: list of <int> } """ with self.get_current_iqr_session() as iqrs: return flask.jsonify({ "uids": list(iqrs.positive_ids) }) @self.route("/get_random_uids") @self._parent_app.module_login.login_required def get_random_uids(): """ Return to the client a list of all known dataset IDs but in a random order. If there is currently an active IQR session with elements in its extension ingest, then those IDs are included in the random list. :return: { uids: list of int } """ all_ids = self._data_set.uuids() with self.get_current_iqr_session() as iqrs: all_ids.update(iqrs.extension_ds.uuids()) all_ids = list(all_ids) random.shuffle(all_ids) return flask.jsonify({ "uids": all_ids }) @self.route("/get_ingest_image_preview_data", methods=["GET"]) @self._parent_app.module_login.login_required def get_ingest_item_image_rep(): """ Return the base64 preview image data for the data file associated with the give UID. """ uid = flask.request.args['uid'] info = { "success": True, "message": None, "is_explicit": None, "shape": None, # (width, height) "data": None, "ext": None, "static_file_link": None, } #: :type: smqtk.data_rep.DataElement de = None if self._data_set.has_uuid(uid): de = self._data_set.get_data(uid) with self._explicit_uids_lock: info["is_explicit"] = uid in self._explicit_uids else: with self.get_current_iqr_session() as iqrs: if iqrs.extension_ds.has_uuid(uid): de = iqrs.extension_ds.get_data(uid) info["is_explicit"] = uid in self._explicit_uids if not de: info["success"] = False info["message"] = "UUID not part of the active data set!" else: # TODO: Have data-file return an HTML chunk for implementation # defined visualization? img_path = self._preview_cache.get_preview_image(de) img = PIL.Image.open(img_path) info["shape"] = img.size with open(img_path, 'rb') as img_file: info["data"] = base64.encodestring(img_file.read()) info["ext"] = osp.splitext(img_path)[1].lstrip('.') if de.uuid() not in self._static_cache: self._static_cache[de.uuid()] = \ de.write_temp(self._static_data_dir) info['static_file_link'] = 'static/' \ + os.path.relpath(self._static_cache[de.uuid()], self.static_folder) return flask.jsonify(info) @self.route("/mark_uid_explicit", methods=["POST"]) @self._parent_app.module_login.login_required def mark_uid_explicit(): """ Mark a given UID as explicit in its containing ingest. :return: Success value of True if the given UID was valid and set as explicit in its containing ingest. :rtype: { "success": bool } """ uid = flask.request.form['uid'] self._explicit_uids.add(uid) # TODO: Save out dict return flask.jsonify({'success': True}) @self.route("/iqr_refine", methods=["POST"]) @self._parent_app.module_login.login_required def iqr_refine(): """ Classify current IQR session indexer, updating ranking for display. Fails gracefully if there are no positive[/negative] adjudications. Expected Args: """ pos_to_add = json.loads(flask.request.form.get('add_pos', '[]')) pos_to_remove = json.loads(flask.request.form.get('remove_pos', '[]')) neg_to_add = json.loads(flask.request.form.get('add_neg', '[]')) neg_to_remove = json.loads(flask.request.form.get('remove_neg', '[]')) with self.get_current_iqr_session() as iqrs: try: iqrs.refine(pos_to_add, neg_to_add, pos_to_remove, neg_to_remove) return flask.jsonify({ "success": True, "message": "Completed refinement" }) except Exception, ex: return flask.jsonify({ "success": False, "message": "ERROR: %s: %s" % (type(ex).__name__, ex.message) })
def main(): usage = "%prog [OPTIONS] INPUT_FILE" description = "Compute a feature vector for a given data file, outputting " \ "the generated feature vector to standard out, or to an " \ "output file if one was specified.\n" \ "\n" \ "An ingest " \ "configuration must be specified for the purpose of " \ "identifying which model files to use (assuming a given " \ "descriptor has/uses model files). The ingest configuration " \ "also informs where to put temporary working files. " parser = bin_utils.SMQTKOptParser(usage, description=description) parser.add_option('-c', '--content-descriptor', help='The descriptor type to use. This must be a type ' 'available in system configuration') parser.add_option('-o', '--output-filepath', help='Optional path to a file to output feature vector ' 'to. Otherwise the feature vector is printed to ' 'standard out. Output is saved in numpy binary ' 'format (.npy suffix recommended).') parser.add_option('-l', '--list', action='store_true', default=False, help='List available descriptor types.') parser.add_option('-v', '--verbose', action='store_true', default=False, help='Print additional debugging messages. All logging ' 'goes to standard error.') opts, args = parser.parse_args() output_filepath = opts.output_filepath descriptor_type = opts.content_descriptor verbose = opts.verbose llevel = logging.DEBUG if verbose else logging.INFO bin_utils.initialize_logging(logging.getLogger(), llevel) log = logging.getLogger("main") if opts.list: log.info("") log.info("Available ContentDescriptor types:") log.info("") for dl in ContentDescriptorConfiguration.available_labels(): log.info("\t%s", dl) log.info("") exit(0) if len(args) == 0: log.error("Failed to provide an input file path") exit(1) if len(args) > 1: log.warning("More than one filepath provided as an argument. Only " "computing for the first one.") input_filepath = args[0] data_element = DataFileElement(input_filepath) fd = ContentDescriptorConfiguration.new_inst(descriptor_type) feat = fd.compute_descriptor(data_element) if output_filepath: numpy.save(output_filepath, feat) else: # Construct string, because numpy s = [] for f in feat: s.append('%15f' % f) print ' '.join(s)
def __init__(self, config_filepath=None): super(DescriptorServiceServer, self).__init__( self.__class__.__name__, static_folder=os.path.join(SCRIPT_DIR, 'static'), template_folder=os.path.join(SCRIPT_DIR, 'templates') ) # # Configuration setup # config_env_loaded = config_file_loaded = None # Load default -- This should always be present, aka base defaults self.config.from_object('smqtk_config') config_default_loaded = True # Load from env var if present if self.ENV_CONFIG in os.environ: self.log.info("Loading config from env var (%s)...", self.ENV_CONFIG) self.config.from_envvar(self.ENV_CONFIG) config_env_loaded = True # Load from configuration file if given if config_filepath and os.path.isfile(config_filepath): config_file_path = \ os.path.expanduser(os.path.abspath(config_filepath)) self.log.info("Loading config from file (%s)...", config_file_path) self.config.from_pyfile(config_file_path) config_file_loaded = True self.log.debug("Config defaults loaded : %s", config_default_loaded) self.log.debug("Config from env loaded : %s", config_env_loaded) self.log.debug("Config from file loaded: %s", config_file_loaded) if not (config_default_loaded or config_env_loaded or config_file_loaded): raise RuntimeError("No configuration file specified for loading. " "(%s=%s) (file=%s)" % (self.ENV_CONFIG, os.environ.get(self.ENV_CONFIG, None), config_filepath)) # Descriptor factory setup if self.ENV_DSS_DE_FACTORY not in os.environ: raise RuntimeError("Missing environment configuration variable " "`%s`, which should be set to the configuration " "label of the DescriptorElementFactory to use." % self.ENV_DSS_DE_FACTORY) self.de_factory_label = os.environ.get(self.ENV_DSS_DE_FACTORY, "MemoryDescriptorFactory") self.log.info("Using Descriptor factory: \"%s\"", self.de_factory_label) try: self.descr_elem_factory = \ DescriptorFactoryConfiguration.new_inst(self.de_factory_label) except KeyError: raise ValueError("Invalid factory label set to %s: \"%s\"" % (self.ENV_DSS_DE_FACTORY, self.de_factory_label)) # Cache of ContentDescriptor instances self.descriptor_cache = {} self.descriptor_cache_lock = multiprocessing.RLock() # # Security # self.secret_key = self.config['SECRET_KEY'] @self.route("/") def list_ingest_labels(): return flask.jsonify({ "labels": sorted(ContentDescriptorConfiguration .available_labels()) }) @self.route("/all/content_types") def all_content_types(): """ Of available descriptors, what content types are processable, and what types are associated to which available descriptor generator. """ r = {} all_types = set() for l in ContentDescriptorConfiguration.available_labels(): d = self.get_descriptor_inst(l) all_types.update(d.valid_content_types()) r[l] = sorted(d.valid_content_types()) return flask.jsonify({ "all": sorted(all_types), "labels": r }) @self.route("/all/compute/<path:uri>") def all_compute(uri): """ Compute descriptors over the specified content for all generators that function over the data's content type. # JSON Return format { "success": <bool> "content_type": <str> "message": <str> "descriptors": { "<label>": <list[float]>, ... } | None "reference_uri": <str> } """ message = "execution nominal" data_elem = None try: data_elem = self.resolve_data_element(uri) except ValueError, ex: message = "Failed URI resolution: %s" % str(ex) descriptors = {} finished_loop = False if data_elem: for l in ContentDescriptorConfiguration.available_labels(): if data_elem.content_type() \ in self.get_descriptor_inst(l).valid_content_types(): d = None try: d = self.generate_descriptor(data_elem, l) except RuntimeError, ex: message = "Descriptor extraction failure: %s" \ % str(ex) except ValueError, ex: message = "Data content type issue: %s" % str(ex) descriptors[l] = d and d.vector().tolist()
def main(): usage = "%prog [OPTIONS] INPUT_FILE" description = """\ Compute a descriptor vector for a given data file, outputting the generated feature vector to standard out, or to an output file if one was specified (in numpy format). """ parser = bin_utils.SMQTKOptParser(usage, description=description) group_labels = optparse.OptionGroup(parser, "Configuration Labels") group_labels.add_option('-c', '--content-descriptor', help='The descriptor type to use. This must be a ' 'type available in the system configuration') group_labels.add_option('-f', '--factory-type', help='The DescriptorElementFactory configuration ' 'to use when computing the descriptor. This ' 'must be a type available in the system ' 'configuration.') parser.add_option_group(group_labels) group_optional = optparse.OptionGroup(parser, "Optional Parameters") group_optional.add_option('-l', '--list', action='store_true', default=False, help='List available descriptor types.') group_optional.add_option('--overwrite', action='store_true', default=False, help="Force descriptor computation even if an " "existing descriptor vector was discovered " "based on the given content descriptor type " "and data combination.") group_optional.add_option('-o', '--output-filepath', help='Optional path to a file to output feature ' 'vector to. Otherwise the feature vector is ' 'printed to standard out. Output is saved ' 'in numpy binary format (.npy suffix ' 'recommended).') group_optional.add_option('-v', '--verbose', action='store_true', default=False, help='Print additional debugging messages. All ' 'logging goes to standard error.') parser.add_option_group(group_optional) opts, args = parser.parse_args() output_filepath = opts.output_filepath descriptor_label = opts.content_descriptor factory_label = opts.factory_type overwrite = opts.overwrite verbose = opts.verbose llevel = logging.DEBUG if verbose else logging.INFO bin_utils.initialize_logging(logging.getLogger(), llevel) log = logging.getLogger("main") if opts.list: log.info("") log.info("Available ContentDescriptor types:") log.info("") for dl in ContentDescriptorConfiguration.available_labels(): log.info("\t%s", dl) log.info("") log.info("Available DescriptorElementFactory types:") log.info("") for df in DescriptorFactoryConfiguration.available_labels(): log.info("\t%s", df) log.info("") exit(0) if len(args) == 0: log.error("Failed to provide an input file path") exit(1) if len(args) > 1: log.warning("More than one filepath provided as an argument. Only " "computing for the first one.") input_filepath = args[0] data_element = DataFileElement(input_filepath) cd = ContentDescriptorConfiguration.new_inst(descriptor_label) factory = DescriptorFactoryConfiguration.new_inst(factory_label) descr_elem = cd.compute_descriptor(data_element, factory, overwrite) vec = descr_elem.vector() if vec is None: log.error("Failed to generate a descriptor vector for the input data!") if output_filepath: numpy.save(output_filepath, vec) else: # Construct string, because numpy s = [] # noinspection PyTypeChecker for f in vec: s.append('%15f' % f) print ' '.join(s)