def _configure(self): # Test extracting config as dictionary self.config_dict = {} cfg = self.available_config() for it in cfg: self.config_dict[it] = self.config_value(it) # If we're in test mode, don't do anything that requires smqtk. if not apply_descriptor_test_mode: # create descriptor factory self.factory = DescriptorElementFactory(DescriptorMemoryElement, {}) # get config file name file_name = self.config_value( "config_file" ) # open file cfg_file = open( file_name ) from smqtk.utils.jsmin import jsmin import json self.descr_config = json.loads( jsmin( cfg_file.read() ) ) #self.generator = CaffeDescriptorGenerator.from_config(self.descr_config) self.generator = from_plugin_config(self.descr_config, get_descriptor_generator_impls) self._base_configure()
# # Input parameters # # The following dictionaries are JSON configurations that are used to # configure the various data structures and algorithms needed for the IQR demo # application. Values here can be changed to suit your specific data and # algorithm needs. # # See algorithm implementation doc-strings for more information on configuration # parameters (see implementation class ``__init__`` method). # search_app_config_filepath = "/Users/purg/dev/smqtk/source/python/smqtk/web/" \ "search_app/config.IqrSearchApp.json" search_app_config = \ json.loads(jsmin.jsmin(open(search_app_config_filepath).read())) # base actions on a specific IQR tab configuration (choose index here) search_app_iqr_config = search_app_config["iqr_tabs"][0] # Shell glob for where input data is located. input_image_file_glob = "/Users/purg/dev/smqtk/source/data/FileDataSets/" \ "example_image/images/*/*" # Configure DataSet implementation and parameters data_set_config = search_app_iqr_config['data_set'] # Configure DescriptorGenerator algorithm implementation, parameters and # persistant model component locations (if implementation has any). descriptor_generator_config = search_app_iqr_config['descr_generator']
def load_algo(m=smqtk.algorithms.nn_index.lsh): with open("itq_config.json") as f: itq_config = json.loads(jsmin.jsmin(f.read())) itq_index = m.LSHNearestNeighborIndex.from_config(itq_config) return itq_index
from smqtk.utils.bin_utils import initialize_logging from smqtk.utils.bit_utils import bit_vector_to_int from smqtk.utils.jsmin import jsmin from load_algo import load_algo UUIDS_FILEPATH = "descriptor_uuids.all.txt" ITQ_ROTATION = "itq.256.rotation.npy" ITQ_MEAN_VEC = "itq.256.mean_vec.npy" fn_sha1_re = re.compile("\w+\.(\w+)\.vector\.npy") element_type_str = open('descriptor_type_name.txt').read().strip() factory_config = json.loads( jsmin(open("descriptor_factory_config.json").read())) factory = DescriptorElementFactory.from_config(factory_config) # # Multiprocessing of ITQ small-code generation # def make_element(uuid): return factory.new_descriptor(element_type_str, uuid) def make_elements_from_uuids(uuids): for uuid in uuids: yield make_element(uuid)
def main(): parser = cli_parser() args = parser.parse_args() # # Setup logging # if not logging.getLogger().handlers: if args.verbose: bin_utils.initialize_logging(logging.getLogger(), logging.DEBUG) else: bin_utils.initialize_logging(logging.getLogger(), logging.INFO) log = logging.getLogger("smqtk.scripts.iqr_app_model_generation") search_app_config = json.loads(jsmin.jsmin(open(args.config).read())) # # Input parameters # # The following dictionaries are JSON configurations that are used to # configure the various data structures and algorithms needed for the IQR demo # application. Values here can be changed to suit your specific data and # algorithm needs. # # See algorithm implementation doc-strings for more information on configuration # parameters (see implementation class ``__init__`` method). # # base actions on a specific IQR tab configuration (choose index here) if args.tab < 0 or args.tab > (len(search_app_config["iqr_tabs"]) - 1): log.error("Invalid tab number provided.") exit(1) search_app_iqr_config = search_app_config["iqr_tabs"][args.tab] # Configure DataSet implementation and parameters data_set_config = search_app_iqr_config['data_set'] # Configure DescriptorGenerator algorithm implementation, parameters and # persistant model component locations (if implementation has any). descriptor_generator_config = search_app_iqr_config['descr_generator'] # Configure NearestNeighborIndex algorithm implementation, parameters and # persistant model component locations (if implementation has any). nn_index_config = search_app_iqr_config['nn_index'] # Configure RelevancyIndex algorithm implementation, parameters and # persistant model component locations (if implementation has any). # # The LibSvmHikRelevancyIndex implementation doesn't actually build a persistant # model (or doesn't have to that is), but we're leaving this block here in # anticipation of other potential implementations in the future. # rel_index_config = search_app_iqr_config['rel_index_config'] # Configure DescriptorElementFactory instance, which defines what implementation # of DescriptorElement to use for storing generated descriptor vectors below. descriptor_elem_factory_config = search_app_iqr_config[ 'descriptor_factory'] # # Initialize data/algorithms # # Constructing appropriate data structures and algorithms, needed for the IQR # demo application, in preparation for model training. # descriptor_elem_factory = \ representation.DescriptorElementFactory \ .from_config(descriptor_elem_factory_config) #: :type: representation.DataSet data_set = \ plugin.from_plugin_config(data_set_config, representation.get_data_set_impls()) #: :type: algorithms.DescriptorGenerator descriptor_generator = \ plugin.from_plugin_config(descriptor_generator_config, algorithms.get_descriptor_generator_impls()) #: :type: algorithms.NearestNeighborsIndex nn_index = \ plugin.from_plugin_config(nn_index_config, algorithms.get_nn_index_impls()) #: :type: algorithms.RelevancyIndex rel_index = \ plugin.from_plugin_config(rel_index_config, algorithms.get_relevancy_index_impls()) # # Build models # # Perform the actual building of the models. # # Add data files to DataSet DataFileElement = representation.get_data_element_impls( )["DataFileElement"] for fp in args.input_files: fp = osp.expanduser(fp) if osp.isfile(fp): data_set.add_data(DataFileElement(fp)) else: log.debug("Expanding glob: %s" % fp) for g in glob.iglob(fp): data_set.add_data(DataFileElement(g)) # Generate a mode if the generator defines a known generation method. if hasattr(descriptor_generator, "generate_model"): descriptor_generator.generate_model(data_set) # Add other if-else cases for other known implementation-specific generation # methods stubs # Generate descriptors of data for building NN index. data2descriptor = descriptor_generator.compute_descriptor_async( data_set, descriptor_elem_factory) try: nn_index.build_index(six.itervalues(data2descriptor)) except RuntimeError: # Already built model, so skipping this step pass rel_index.build_index(six.itervalues(data2descriptor))
def main(): parser = bin_utils.SMQTKOptParser() setup_cli(parser) opts, args = parser.parse_args() debug_smqtk = opts.debug_smqtk debug_server = opts.debug_server bin_utils.initialize_logging(logging.getLogger("smqtk"), logging.INFO - (10*debug_smqtk)) bin_utils.initialize_logging(logging.getLogger("werkzeug"), logging.WARN - (20*debug_server)) log = logging.getLogger("smqtk.main") web_applications = smqtk.web.get_web_applications() if opts.list: log.info("") log.info("Available applications:") log.info("") for l in web_applications: log.info("\t" + l) log.info("") exit(0) application_name = opts.application if application_name is None: log.error("No application name given!") exit(1) elif application_name not in web_applications: log.error("Invalid application label '%s'", application_name) exit(1) app_class = web_applications[application_name] # Output config and exit if requested bin_utils.output_config(opts.output_config, app_class.get_default_config(), log, opts.overwrite) if not opts.config: log.error("No configuration provided") exit(1) elif not os.path.isfile(opts.config): log.error("Configuration file path not valid.") exit(1) with open(opts.config, 'r') as f: config = json.loads(jsmin(f.read())) host = opts.host port = opts.port and int(opts.port) use_reloader = opts.reload use_threading = opts.threaded use_basic_auth = opts.use_basic_auth # noinspection PyUnresolvedReferences app = app_class.from_config(config) if use_basic_auth: app.config["BASIC_AUTH_FORCE"] = True BasicAuth(app) app.config['DEBUG'] = debug_server app.run(host=host, port=port, debug=debug_server, use_reloader=use_reloader, threaded=use_threading)
def _minify(self, js): return jsmin.jsmin(js)
def testUnicode(self): instr = u'\u4000 //foo' expected = u'\u4000' output = jsmin.jsmin(instr) self.assertEqual(output, expected)
def assertMinified(self, js_input, expected): minified = jsmin.jsmin(js_input) assert minified == expected, "%r != %r" % (minified, expected)
def main(): parser = cli_parser() args = parser.parse_args() # # Setup logging # if not logging.getLogger().handlers: if args.verbose: bin_utils.initialize_logging(logging.getLogger(), logging.DEBUG) else: bin_utils.initialize_logging(logging.getLogger(), logging.INFO) log = logging.getLogger("smqtk.scripts.iqr_app_model_generation") search_app_config = json.loads(jsmin.jsmin(open(args.config).read())) # # Input parameters # # The following dictionaries are JSON configurations that are used to # configure the various data structures and algorithms needed for the IQR demo # application. Values here can be changed to suit your specific data and # algorithm needs. # # See algorithm implementation doc-strings for more information on configuration # parameters (see implementation class ``__init__`` method). # # base actions on a specific IQR tab configuration (choose index here) if args.tab < 0 or args.tab > (len(search_app_config["iqr_tabs"]) - 1): log.error("Invalid tab number provided.") exit(1) search_app_iqr_config = search_app_config["iqr_tabs"][args.tab] # Configure DataSet implementation and parameters data_set_config = search_app_iqr_config['data_set'] # Configure DescriptorGenerator algorithm implementation, parameters and # persistant model component locations (if implementation has any). descriptor_generator_config = search_app_iqr_config['descr_generator'] # Configure NearestNeighborIndex algorithm implementation, parameters and # persistant model component locations (if implementation has any). nn_index_config = search_app_iqr_config['nn_index'] # Configure RelevancyIndex algorithm implementation, parameters and # persistant model component locations (if implementation has any). # # The LibSvmHikRelevancyIndex implementation doesn't actually build a persistant # model (or doesn't have to that is), but we're leaving this block here in # anticipation of other potential implementations in the future. # rel_index_config = search_app_iqr_config['rel_index_config'] # Configure DescriptorElementFactory instance, which defines what implementation # of DescriptorElement to use for storing generated descriptor vectors below. descriptor_elem_factory_config = search_app_iqr_config['descriptor_factory'] # # Initialize data/algorithms # # Constructing appropriate data structures and algorithms, needed for the IQR # demo application, in preparation for model training. # descriptor_elem_factory = \ representation.DescriptorElementFactory \ .from_config(descriptor_elem_factory_config) #: :type: representation.DataSet data_set = \ plugin.from_plugin_config(data_set_config, representation.get_data_set_impls) #: :type: algorithms.DescriptorGenerator descriptor_generator = \ plugin.from_plugin_config(descriptor_generator_config, algorithms.get_descriptor_generator_impls) #: :type: algorithms.NearestNeighborsIndex nn_index = \ plugin.from_plugin_config(nn_index_config, algorithms.get_nn_index_impls) #: :type: algorithms.RelevancyIndex rel_index = \ plugin.from_plugin_config(rel_index_config, algorithms.get_relevancy_index_impls) # # Build models # # Perform the actual building of the models. # # Add data files to DataSet DataFileElement = representation.get_data_element_impls()["DataFileElement"] for fp in args.input_files: fp = osp.expanduser(fp) if osp.isfile(fp): data_set.add_data(DataFileElement(fp)) else: log.debug("Expanding glob: %s" % fp) for g in glob.iglob(fp): data_set.add_data(DataFileElement(g)) # Generate a mode if the generator defines a known generation method. if hasattr(descriptor_generator, "generate_model"): descriptor_generator.generate_model(data_set) # Add other if-else cases for other known implementation-specific generation # methods stubs # Generate descriptors of data for building NN index. data2descriptor = descriptor_generator.compute_descriptor_async( data_set, descriptor_elem_factory ) try: nn_index.build_index(data2descriptor.itervalues()) except RuntimeError: # Already built model, so skipping this step pass rel_index.build_index(data2descriptor.itervalues())
from smqtk.representation import DescriptorElementFactory from smqtk.utils.bin_utils import logging, initialize_logging from smqtk.utils.jsmin import jsmin from load_algo import load_algo if not logging.getLogger().handlers: initialize_logging(logging.getLogger(), logging.DEBUG) log = logging.getLogger(__name__) log.info("Loading descriptor elements") d_type_str = open("descriptor_type_name.txt").read().strip() df_config = json.loads(jsmin(open('descriptor_factory_config.json').read())) factory = DescriptorElementFactory.from_config(df_config) # # Sample code for finding non-NaN descriptors in parallel # # def add_non_nan_uuid(uuid): # d = factory.new_descriptor(d_type_str, uuid) # if d.vector().sum() > 0: # return uuid # return None # # import multiprocessing # p = multiprocessing.Pool() # non_nan_uuids = \ # p.map(add_non_nan_uuid,
import json from smqtk.representation import DescriptorElementFactory from smqtk.utils.bin_utils import logging, initialize_logging from smqtk.utils.jsmin import jsmin from load_algo import load_algo if not logging.getLogger().handlers: initialize_logging(logging.getLogger(), logging.DEBUG) log = logging.getLogger(__name__) log.info("Loading descriptor elements") d_type_str = open("descriptor_type_name.txt").read().strip() df_config = json.loads(jsmin(open('descriptor_factory_config.json').read())) factory = DescriptorElementFactory.from_config(df_config) # # Sample code for finding non-NaN descriptors in parallel # # def add_non_nan_uuid(uuid): # d = factory.new_descriptor(d_type_str, uuid) # if d.vector().sum() > 0: # return uuid # return None # # import multiprocessing # p = multiprocessing.Pool() # non_nan_uuids = \ # p.map(add_non_nan_uuid, # (l.strip() for l in open('descriptor_uuids.txt')))
# Initialize logging llevel = debug and logging.DEBUG or logging.INFO if not logging.getLogger('smqtk').handlers: initialize_logging(logging.getLogger('smqtk'), llevel) if not logging.getLogger('__main__').handlers: initialize_logging(logging.getLogger('__main__'), llevel) l = logging.getLogger(__name__) # Merge loaded config with default config_loaded = False c = default_config() if config_fp: if os.path.isfile(config_fp): with open(config_fp) as f: c.update(json.loads(jsmin(f.read()))) config_loaded = True else: l.error("Config file path not valid") exit(100) output_config(out_config_fp, c, overwrite=True) # Input checking if not config_loaded: l.error("No configuration provided") exit(101) if not filelist_fp: l.error("No file-list file specified") exit(102)
def main(): import optparse description = \ "Generate the model for the given indexer type, using features " \ "from the given feature descriptor type. We use configured valued in " \ "the smqtk_config module and from the system configuration JSON file " \ "(etc/system_config.json) unless otherwise specified by options to " \ "this script. Specific ingest used is determined by the ingest type " \ "provided (-t/--type)." parser = bin_utils.SMQTKOptParser(description=description) group_required = optparse.OptionGroup(parser, "Required Options") group_optional = optparse.OptionGroup(parser, "Optional") group_required.add_option('-d', '--data-set', help="Data set to use for model generation.") group_required.add_option('-c', '--content-descriptor', help="Feature descriptor type for model and " "feature generation.") group_required.add_option('-i', '--indexer', help="(Optional) Indexer type for model " "generation.") group_optional.add_option('--sys-json', help="Custom system configuration JSON file to " "use. Otherwise we use the one specified in " "the smqtk_config module.") group_optional.add_option('-l', '--list', action='store_true', default=False, help="List available ingest configurations. If " "a valid ingest configuration has been " "specified, we list available " "FeatureDetector and Indexer configurations " "available.") group_optional.add_option('-t', '--threads', type=int, default=None, help='Number of threads/processes to use for ' 'processing. By default we use all ' 'available cores/threads.') group_optional.add_option('-v', '--verbose', action='store_true', default=False, help='Add debug messaged to output logging.') parser.add_option_group(group_required) parser.add_option_group(group_optional) opts, args = parser.parse_args() bin_utils.initialize_logging(logging.getLogger(), logging.INFO - (10*opts.verbose)) log = logging.getLogger("main") dset_label = opts.data_set cd_label = opts.content_descriptor idxr_label = opts.indexer parallel = opts.threads # Prep custom JSON configuration if one was given if opts.sys_json: with open(opts.sys_json) as json_file: json_config = json.loads(jsmin(json_file.read())) ConfigurationInterface.BASE_CONFIG = json_config['Ingests'] if opts.list: log.info("") log.info("Available Data Sets:") log.info("") for l in DataSetConfiguration.available_labels(): log.info("\t%s" % l) log.info("") log.info("Available ContentDescriptor types:") log.info("") for l in ContentDescriptorConfiguration.available_labels(): log.info("\t%s" % l) log.info("") log.info("Available Indexer types:") log.info("") for l in IndexerConfiguration.available_labels(): log.info("\t%s", l) log.info("") exit(0) # Check given labels fail = False if dset_label and dset_label not in DataSetConfiguration.available_labels(): log.error("Given label '%s' is NOT associated to an existing " "data set configuration!", dset_label) fail = True if cd_label and cd_label not in ContentDescriptorConfiguration.available_labels(): log.error("Given label '%s' is NOT associated to an existing " "content descriptor configuration!", cd_label) fail = True if idxr_label and idxr_label not in IndexerConfiguration.available_labels(): log.error("Given label '%s' is NOT associated to an existing " "indexer configuration!", idxr_label) fail = True if fail: exit(1) del fail log.info("Loading data-set instance...") #: :type: DataIngest or VideoIngest dset = DataSetConfiguration.new_inst(dset_label) log.info("Loading descriptor instance...") #: :type: smqtk.content_description.ContentDescriptor descriptor = ContentDescriptorConfiguration.new_inst(cd_label) # Generate any model files needed by the chosen descriptor descriptor.PARALLEL = parallel descriptor.generate_model(dset) # Don't do indexer model generation if a type was not provided if idxr_label: log.info("Loading indexer instance...") #: :type: smqtk.indexing.Indexer indexer = IndexerConfiguration.new_inst(idxr_label) # It is not guaranteed that the feature computation method is doing # anything in parallel, but if it is, request that it perform serially # in order to allow multiple high-level feature computation jobs, else # we could be overrun with threads. descriptor.PARALLEL = 1 # Using NonDaemonicPool because content_description that might to # parallel processing might use multiprocessing.Pool instances, too. # Pools don't usually allow daemonic processes, so this custom top-level # pool allows worker processes to spawn pools themselves. fmap = descriptor.compute_descriptor_async( dset, parallel=parallel, pool_type=NonDaemonicPool ) indexer.generate_model(fmap, parallel=parallel)
# Initialize logging llevel = debug and logging.DEBUG or logging.INFO if not logging.getLogger("smqtk").handlers: initialize_logging(logging.getLogger("smqtk"), llevel) if not logging.getLogger("__main__").handlers: initialize_logging(logging.getLogger("__main__"), llevel) l = logging.getLogger(__name__) # Merge loaded config with default config_loaded = False c = default_config() if config_fp: if os.path.isfile(config_fp): with open(config_fp) as f: c.update(json.loads(jsmin(f.read()))) config_loaded = True else: l.error("Config file path not valid") exit(100) output_config(out_config_fp, c, overwrite=True) # Input checking if not config_loaded: l.error("No configuration provided") exit(101) if not filelist_fp: l.error("No file-list file specified") exit(102)