Exemplo n.º 1
0
    def _configure(self):
	# Test extracting config as dictionary
        self.config_dict = {}
        cfg = self.available_config()
        for it in cfg:
            self.config_dict[it] = self.config_value(it)

        # If we're in test mode, don't do anything that requires smqtk.
        if not apply_descriptor_test_mode:
            # create descriptor factory
            self.factory = DescriptorElementFactory(DescriptorMemoryElement, {})

            # get config file name
            file_name = self.config_value( "config_file" )

            # open file
            cfg_file = open( file_name )

            from smqtk.utils.jsmin import jsmin
            import json

            self.descr_config = json.loads( jsmin( cfg_file.read() ) )

            #self.generator = CaffeDescriptorGenerator.from_config(self.descr_config)
            self.generator = from_plugin_config(self.descr_config, get_descriptor_generator_impls)

        self._base_configure()
Exemplo n.º 2
0
    def _configure(self):
	# Test extracting config as dictionary
        self.config_dict = {}
        cfg = self.available_config()
        for it in cfg:
            self.config_dict[it] = self.config_value(it)

        # If we're in test mode, don't do anything that requires smqtk.
        if not apply_descriptor_test_mode:
            # create descriptor factory
            self.factory = DescriptorElementFactory(DescriptorMemoryElement, {})

            # get config file name
            file_name = self.config_value( "config_file" )

            # open file
            cfg_file = open( file_name )

            from smqtk.utils.jsmin import jsmin
            import json

            self.descr_config = json.loads( jsmin( cfg_file.read() ) )

            #self.generator = CaffeDescriptorGenerator.from_config(self.descr_config)
            self.generator = from_plugin_config(self.descr_config, get_descriptor_generator_impls)

        self._base_configure()
Exemplo n.º 3
0
#
# Input parameters
#
# The following dictionaries are JSON configurations that are used to
# configure the various data structures and algorithms needed for the IQR demo
# application. Values here can be changed to suit your specific data and
# algorithm needs.
#
# See algorithm implementation doc-strings for more information on configuration
# parameters (see implementation class ``__init__`` method).
#
search_app_config_filepath = "/Users/purg/dev/smqtk/source/python/smqtk/web/" \
                             "search_app/config.IqrSearchApp.json"
search_app_config = \
    json.loads(jsmin.jsmin(open(search_app_config_filepath).read()))

# base actions on a specific IQR tab configuration (choose index here)
search_app_iqr_config = search_app_config["iqr_tabs"][0]

# Shell glob for where input data is located.
input_image_file_glob = "/Users/purg/dev/smqtk/source/data/FileDataSets/" \
                        "example_image/images/*/*"

# Configure DataSet implementation and parameters
data_set_config = search_app_iqr_config['data_set']

# Configure DescriptorGenerator algorithm implementation, parameters and
# persistant model component locations (if implementation has any).
descriptor_generator_config = search_app_iqr_config['descr_generator']
Exemplo n.º 4
0
def load_algo(m=smqtk.algorithms.nn_index.lsh):
    with open("itq_config.json") as f:
        itq_config = json.loads(jsmin.jsmin(f.read()))
    itq_index = m.LSHNearestNeighborIndex.from_config(itq_config)
    return itq_index
Exemplo n.º 5
0
from smqtk.utils.bin_utils import initialize_logging
from smqtk.utils.bit_utils import bit_vector_to_int
from smqtk.utils.jsmin import jsmin

from load_algo import load_algo

UUIDS_FILEPATH = "descriptor_uuids.all.txt"
ITQ_ROTATION = "itq.256.rotation.npy"
ITQ_MEAN_VEC = "itq.256.mean_vec.npy"

fn_sha1_re = re.compile("\w+\.(\w+)\.vector\.npy")

element_type_str = open('descriptor_type_name.txt').read().strip()

factory_config = json.loads(
    jsmin(open("descriptor_factory_config.json").read()))
factory = DescriptorElementFactory.from_config(factory_config)


#
# Multiprocessing of ITQ small-code generation
#
def make_element(uuid):
    return factory.new_descriptor(element_type_str, uuid)


def make_elements_from_uuids(uuids):
    for uuid in uuids:
        yield make_element(uuid)

Exemplo n.º 6
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    #
    # Setup logging
    #
    if not logging.getLogger().handlers:
        if args.verbose:
            bin_utils.initialize_logging(logging.getLogger(), logging.DEBUG)
        else:
            bin_utils.initialize_logging(logging.getLogger(), logging.INFO)
    log = logging.getLogger("smqtk.scripts.iqr_app_model_generation")

    search_app_config = json.loads(jsmin.jsmin(open(args.config).read()))

    #
    # Input parameters
    #
    # The following dictionaries are JSON configurations that are used to
    # configure the various data structures and algorithms needed for the IQR demo
    # application. Values here can be changed to suit your specific data and
    # algorithm needs.
    #
    # See algorithm implementation doc-strings for more information on configuration
    # parameters (see implementation class ``__init__`` method).
    #

    # base actions on a specific IQR tab configuration (choose index here)
    if args.tab < 0 or args.tab > (len(search_app_config["iqr_tabs"]) - 1):
        log.error("Invalid tab number provided.")
        exit(1)

    search_app_iqr_config = search_app_config["iqr_tabs"][args.tab]

    # Configure DataSet implementation and parameters
    data_set_config = search_app_iqr_config['data_set']

    # Configure DescriptorGenerator algorithm implementation, parameters and
    # persistant model component locations (if implementation has any).
    descriptor_generator_config = search_app_iqr_config['descr_generator']

    # Configure NearestNeighborIndex algorithm implementation, parameters and
    # persistant model component locations (if implementation has any).
    nn_index_config = search_app_iqr_config['nn_index']

    # Configure RelevancyIndex algorithm implementation, parameters and
    # persistant model component locations (if implementation has any).
    #
    # The LibSvmHikRelevancyIndex implementation doesn't actually build a persistant
    # model (or doesn't have to that is), but we're leaving this block here in
    # anticipation of other potential implementations in the future.
    #
    rel_index_config = search_app_iqr_config['rel_index_config']

    # Configure DescriptorElementFactory instance, which defines what implementation
    # of DescriptorElement to use for storing generated descriptor vectors below.
    descriptor_elem_factory_config = search_app_iqr_config[
        'descriptor_factory']

    #
    # Initialize data/algorithms
    #
    # Constructing appropriate data structures and algorithms, needed for the IQR
    # demo application, in preparation for model training.
    #

    descriptor_elem_factory = \
        representation.DescriptorElementFactory \
        .from_config(descriptor_elem_factory_config)

    #: :type: representation.DataSet
    data_set = \
        plugin.from_plugin_config(data_set_config,
                                  representation.get_data_set_impls())
    #: :type: algorithms.DescriptorGenerator
    descriptor_generator = \
        plugin.from_plugin_config(descriptor_generator_config,
                                  algorithms.get_descriptor_generator_impls())

    #: :type: algorithms.NearestNeighborsIndex
    nn_index = \
        plugin.from_plugin_config(nn_index_config,
                                  algorithms.get_nn_index_impls())

    #: :type: algorithms.RelevancyIndex
    rel_index = \
        plugin.from_plugin_config(rel_index_config,
                                  algorithms.get_relevancy_index_impls())

    #
    # Build models
    #
    # Perform the actual building of the models.
    #

    # Add data files to DataSet
    DataFileElement = representation.get_data_element_impls(
    )["DataFileElement"]

    for fp in args.input_files:
        fp = osp.expanduser(fp)
        if osp.isfile(fp):
            data_set.add_data(DataFileElement(fp))
        else:
            log.debug("Expanding glob: %s" % fp)
            for g in glob.iglob(fp):
                data_set.add_data(DataFileElement(g))

    # Generate a mode if the generator defines a known generation method.
    if hasattr(descriptor_generator, "generate_model"):
        descriptor_generator.generate_model(data_set)
    # Add other if-else cases for other known implementation-specific generation
    # methods stubs

    # Generate descriptors of data for building NN index.
    data2descriptor = descriptor_generator.compute_descriptor_async(
        data_set, descriptor_elem_factory)

    try:
        nn_index.build_index(six.itervalues(data2descriptor))
    except RuntimeError:
        # Already built model, so skipping this step
        pass

    rel_index.build_index(six.itervalues(data2descriptor))
Exemplo n.º 7
0
def main():
    parser = bin_utils.SMQTKOptParser()
    setup_cli(parser)
    opts, args = parser.parse_args()

    debug_smqtk = opts.debug_smqtk
    debug_server = opts.debug_server

    bin_utils.initialize_logging(logging.getLogger("smqtk"),
                                 logging.INFO - (10*debug_smqtk))
    bin_utils.initialize_logging(logging.getLogger("werkzeug"),
                                 logging.WARN - (20*debug_server))
    log = logging.getLogger("smqtk.main")

    web_applications = smqtk.web.get_web_applications()

    if opts.list:
        log.info("")
        log.info("Available applications:")
        log.info("")
        for l in web_applications:
            log.info("\t" + l)
        log.info("")
        exit(0)

    application_name = opts.application

    if application_name is None:
        log.error("No application name given!")
        exit(1)
    elif application_name not in web_applications:
        log.error("Invalid application label '%s'", application_name)
        exit(1)

    app_class = web_applications[application_name]

    # Output config and exit if requested
    bin_utils.output_config(opts.output_config, app_class.get_default_config(),
                            log, opts.overwrite)

    if not opts.config:
        log.error("No configuration provided")
        exit(1)
    elif not os.path.isfile(opts.config):
        log.error("Configuration file path not valid.")
        exit(1)

    with open(opts.config, 'r') as f:
        config = json.loads(jsmin(f.read()))

    host = opts.host
    port = opts.port and int(opts.port)
    use_reloader = opts.reload
    use_threading = opts.threaded
    use_basic_auth = opts.use_basic_auth

    # noinspection PyUnresolvedReferences
    app = app_class.from_config(config)
    if use_basic_auth:
        app.config["BASIC_AUTH_FORCE"] = True
        BasicAuth(app)
    app.config['DEBUG'] = debug_server

    app.run(host=host, port=port, debug=debug_server, use_reloader=use_reloader,
            threaded=use_threading)
Exemplo n.º 8
0
 def _minify(self, js):
     return jsmin.jsmin(js)
Exemplo n.º 9
0
 def testUnicode(self):
     instr = u'\u4000 //foo'
     expected = u'\u4000'
     output = jsmin.jsmin(instr)
     self.assertEqual(output, expected)
Exemplo n.º 10
0
 def _minify(self, js):
     return jsmin.jsmin(js)
Exemplo n.º 11
0
 def assertMinified(self, js_input, expected):
     minified = jsmin.jsmin(js_input)
     assert minified == expected, "%r != %r" % (minified, expected)
Exemplo n.º 12
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    #
    # Setup logging
    #
    if not logging.getLogger().handlers:
        if args.verbose:
            bin_utils.initialize_logging(logging.getLogger(), logging.DEBUG)
        else:
            bin_utils.initialize_logging(logging.getLogger(), logging.INFO)
    log = logging.getLogger("smqtk.scripts.iqr_app_model_generation")

    search_app_config = json.loads(jsmin.jsmin(open(args.config).read()))

    #
    # Input parameters
    #
    # The following dictionaries are JSON configurations that are used to
    # configure the various data structures and algorithms needed for the IQR demo
    # application. Values here can be changed to suit your specific data and
    # algorithm needs.
    #
    # See algorithm implementation doc-strings for more information on configuration
    # parameters (see implementation class ``__init__`` method).
    #

    # base actions on a specific IQR tab configuration (choose index here)
    if args.tab <  0 or args.tab > (len(search_app_config["iqr_tabs"]) - 1):
        log.error("Invalid tab number provided.")
        exit(1)

    search_app_iqr_config = search_app_config["iqr_tabs"][args.tab]

    # Configure DataSet implementation and parameters
    data_set_config = search_app_iqr_config['data_set']

    # Configure DescriptorGenerator algorithm implementation, parameters and
    # persistant model component locations (if implementation has any).
    descriptor_generator_config = search_app_iqr_config['descr_generator']

    # Configure NearestNeighborIndex algorithm implementation, parameters and
    # persistant model component locations (if implementation has any).
    nn_index_config = search_app_iqr_config['nn_index']

    # Configure RelevancyIndex algorithm implementation, parameters and
    # persistant model component locations (if implementation has any).
    #
    # The LibSvmHikRelevancyIndex implementation doesn't actually build a persistant
    # model (or doesn't have to that is), but we're leaving this block here in
    # anticipation of other potential implementations in the future.
    #
    rel_index_config = search_app_iqr_config['rel_index_config']

    # Configure DescriptorElementFactory instance, which defines what implementation
    # of DescriptorElement to use for storing generated descriptor vectors below.
    descriptor_elem_factory_config = search_app_iqr_config['descriptor_factory']

    #
    # Initialize data/algorithms
    #
    # Constructing appropriate data structures and algorithms, needed for the IQR
    # demo application, in preparation for model training.
    #

    descriptor_elem_factory = \
        representation.DescriptorElementFactory \
        .from_config(descriptor_elem_factory_config)

    #: :type: representation.DataSet
    data_set = \
        plugin.from_plugin_config(data_set_config,
                                  representation.get_data_set_impls)
    #: :type: algorithms.DescriptorGenerator
    descriptor_generator = \
        plugin.from_plugin_config(descriptor_generator_config,
                                  algorithms.get_descriptor_generator_impls)

    #: :type: algorithms.NearestNeighborsIndex
    nn_index = \
        plugin.from_plugin_config(nn_index_config,
                                  algorithms.get_nn_index_impls)

    #: :type: algorithms.RelevancyIndex
    rel_index = \
        plugin.from_plugin_config(rel_index_config,
                                  algorithms.get_relevancy_index_impls)

    #
    # Build models
    #
    # Perform the actual building of the models.
    #

    # Add data files to DataSet
    DataFileElement = representation.get_data_element_impls()["DataFileElement"]

    for fp in args.input_files:
        fp = osp.expanduser(fp)
        if osp.isfile(fp):
            data_set.add_data(DataFileElement(fp))
        else:
            log.debug("Expanding glob: %s" % fp)
            for g in glob.iglob(fp):
                data_set.add_data(DataFileElement(g))

    # Generate a mode if the generator defines a known generation method.
    if hasattr(descriptor_generator, "generate_model"):
        descriptor_generator.generate_model(data_set)
    # Add other if-else cases for other known implementation-specific generation
    # methods stubs

    # Generate descriptors of data for building NN index.
    data2descriptor = descriptor_generator.compute_descriptor_async(
        data_set, descriptor_elem_factory
    )

    try:
        nn_index.build_index(data2descriptor.itervalues())
    except RuntimeError:
        # Already built model, so skipping this step
        pass

    rel_index.build_index(data2descriptor.itervalues())
Exemplo n.º 13
0
from smqtk.representation import DescriptorElementFactory
from smqtk.utils.bin_utils import logging, initialize_logging
from smqtk.utils.jsmin import jsmin

from load_algo import load_algo


if not logging.getLogger().handlers:
    initialize_logging(logging.getLogger(), logging.DEBUG)
log = logging.getLogger(__name__)


log.info("Loading descriptor elements")
d_type_str = open("descriptor_type_name.txt").read().strip()
df_config = json.loads(jsmin(open('descriptor_factory_config.json').read()))
factory = DescriptorElementFactory.from_config(df_config)

#
# Sample code for finding non-NaN descriptors in parallel
#
# def add_non_nan_uuid(uuid):
#     d = factory.new_descriptor(d_type_str, uuid)
#     if d.vector().sum() > 0:
#         return uuid
#     return None
#
# import multiprocessing
# p = multiprocessing.Pool()
# non_nan_uuids = \
#     p.map(add_non_nan_uuid,
Exemplo n.º 14
0
 def assertMinified(self, js_input, expected):
     minified = jsmin.jsmin(js_input)
     assert minified == expected, "%r != %r" % (minified, expected)
Exemplo n.º 15
0
import json

from smqtk.representation import DescriptorElementFactory
from smqtk.utils.bin_utils import logging, initialize_logging
from smqtk.utils.jsmin import jsmin

from load_algo import load_algo

if not logging.getLogger().handlers:
    initialize_logging(logging.getLogger(), logging.DEBUG)
log = logging.getLogger(__name__)

log.info("Loading descriptor elements")
d_type_str = open("descriptor_type_name.txt").read().strip()
df_config = json.loads(jsmin(open('descriptor_factory_config.json').read()))
factory = DescriptorElementFactory.from_config(df_config)

#
# Sample code for finding non-NaN descriptors in parallel
#
# def add_non_nan_uuid(uuid):
#     d = factory.new_descriptor(d_type_str, uuid)
#     if d.vector().sum() > 0:
#         return uuid
#     return None
#
# import multiprocessing
# p = multiprocessing.Pool()
# non_nan_uuids = \
#     p.map(add_non_nan_uuid,
#           (l.strip() for l in open('descriptor_uuids.txt')))
Exemplo n.º 16
0
 def testUnicode(self):
     instr = u'\u4000 //foo'
     expected = u'\u4000'
     output = jsmin.jsmin(instr)
     self.assertEqual(output, expected)
Exemplo n.º 17
0
    # Initialize logging
    llevel = debug and logging.DEBUG or logging.INFO
    if not logging.getLogger('smqtk').handlers:
        initialize_logging(logging.getLogger('smqtk'), llevel)
    if not logging.getLogger('__main__').handlers:
        initialize_logging(logging.getLogger('__main__'), llevel)

    l = logging.getLogger(__name__)

    # Merge loaded config with default
    config_loaded = False
    c = default_config()
    if config_fp:
        if os.path.isfile(config_fp):
            with open(config_fp) as f:
                c.update(json.loads(jsmin(f.read())))
            config_loaded = True
        else:
            l.error("Config file path not valid")
            exit(100)

    output_config(out_config_fp, c, overwrite=True)

    # Input checking
    if not config_loaded:
        l.error("No configuration provided")
        exit(101)

    if not filelist_fp:
        l.error("No file-list file specified")
        exit(102)
Exemplo n.º 18
0
def main():
    import optparse
    description = \
        "Generate the model for the given indexer type, using features " \
        "from the given feature descriptor type. We use configured valued in " \
        "the smqtk_config module and from the system configuration JSON file " \
        "(etc/system_config.json) unless otherwise specified by options to " \
        "this script. Specific ingest used is determined by the ingest type " \
        "provided (-t/--type)."
    parser = bin_utils.SMQTKOptParser(description=description)
    group_required = optparse.OptionGroup(parser, "Required Options")
    group_optional = optparse.OptionGroup(parser, "Optional")

    group_required.add_option('-d', '--data-set',
                              help="Data set to use for model generation.")
    group_required.add_option('-c', '--content-descriptor',
                              help="Feature descriptor type for model and "
                                   "feature generation.")
    group_required.add_option('-i', '--indexer',
                              help="(Optional) Indexer type for model "
                                   "generation.")

    group_optional.add_option('--sys-json',
                              help="Custom system configuration JSON file to "
                                   "use. Otherwise we use the one specified in "
                                   "the smqtk_config module.")
    group_optional.add_option('-l', '--list',
                              action='store_true', default=False,
                              help="List available ingest configurations. If "
                                   "a valid ingest configuration has been "
                                   "specified, we list available "
                                   "FeatureDetector and Indexer configurations "
                                   "available.")
    group_optional.add_option('-t', '--threads', type=int, default=None,
                              help='Number of threads/processes to use for '
                                   'processing. By default we use all '
                                   'available cores/threads.')
    group_optional.add_option('-v', '--verbose', action='store_true',
                              default=False,
                              help='Add debug messaged to output logging.')

    parser.add_option_group(group_required)
    parser.add_option_group(group_optional)
    opts, args = parser.parse_args()

    bin_utils.initialize_logging(logging.getLogger(),
                                logging.INFO - (10*opts.verbose))
    log = logging.getLogger("main")

    dset_label = opts.data_set
    cd_label = opts.content_descriptor
    idxr_label = opts.indexer
    parallel = opts.threads

    # Prep custom JSON configuration if one was given
    if opts.sys_json:
        with open(opts.sys_json) as json_file:
            json_config = json.loads(jsmin(json_file.read()))
        ConfigurationInterface.BASE_CONFIG = json_config['Ingests']

    if opts.list:
        log.info("")
        log.info("Available Data Sets:")
        log.info("")
        for l in DataSetConfiguration.available_labels():
            log.info("\t%s" % l)
        log.info("")
        log.info("Available ContentDescriptor types:")
        log.info("")
        for l in ContentDescriptorConfiguration.available_labels():
            log.info("\t%s" % l)
        log.info("")
        log.info("Available Indexer types:")
        log.info("")
        for l in IndexerConfiguration.available_labels():
            log.info("\t%s", l)
        log.info("")
        exit(0)

    # Check given labels
    fail = False
    if dset_label and dset_label not in DataSetConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "data set configuration!", dset_label)
        fail = True
    if cd_label and cd_label not in ContentDescriptorConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "content descriptor configuration!", cd_label)
        fail = True
    if idxr_label and idxr_label not in IndexerConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "indexer configuration!", idxr_label)
        fail = True
    if fail:
        exit(1)
    del fail

    log.info("Loading data-set instance...")
    #: :type: DataIngest or VideoIngest
    dset = DataSetConfiguration.new_inst(dset_label)

    log.info("Loading descriptor instance...")
    #: :type: smqtk.content_description.ContentDescriptor
    descriptor = ContentDescriptorConfiguration.new_inst(cd_label)
    # Generate any model files needed by the chosen descriptor
    descriptor.PARALLEL = parallel
    descriptor.generate_model(dset)

    # Don't do indexer model generation if a type was not provided
    if idxr_label:
        log.info("Loading indexer instance...")
        #: :type: smqtk.indexing.Indexer
        indexer = IndexerConfiguration.new_inst(idxr_label)

        # It is not guaranteed that the feature computation method is doing
        # anything in parallel, but if it is, request that it perform serially
        # in order to allow multiple high-level feature computation jobs, else
        # we could be overrun with threads.
        descriptor.PARALLEL = 1
        # Using NonDaemonicPool because content_description that might to
        # parallel processing might use multiprocessing.Pool instances, too.
        # Pools don't usually allow daemonic processes, so this custom top-level
        # pool allows worker processes to spawn pools themselves.
        fmap = descriptor.compute_descriptor_async(
            dset,
            parallel=parallel,
            pool_type=NonDaemonicPool
        )

        indexer.generate_model(fmap, parallel=parallel)
Exemplo n.º 19
0
    # Initialize logging
    llevel = debug and logging.DEBUG or logging.INFO
    if not logging.getLogger("smqtk").handlers:
        initialize_logging(logging.getLogger("smqtk"), llevel)
    if not logging.getLogger("__main__").handlers:
        initialize_logging(logging.getLogger("__main__"), llevel)

    l = logging.getLogger(__name__)

    # Merge loaded config with default
    config_loaded = False
    c = default_config()
    if config_fp:
        if os.path.isfile(config_fp):
            with open(config_fp) as f:
                c.update(json.loads(jsmin(f.read())))
            config_loaded = True
        else:
            l.error("Config file path not valid")
            exit(100)

    output_config(out_config_fp, c, overwrite=True)

    # Input checking
    if not config_loaded:
        l.error("No configuration provided")
        exit(101)

    if not filelist_fp:
        l.error("No file-list file specified")
        exit(102)