Пример #1
0
def main():
    log = logging.getLogger(__name__)
    parser = get_cli_parser()
    args = parser.parse_args()

    config_path = args.config
    generate_config = args.generate_config
    config_overwrite = args.overwrite
    is_debug = args.debug

    label = args.label
    file_globs = args.file_globs

    initialize_logging(logging.getLogger(__name__), is_debug and logging.DEBUG
                       or logging.INFO)
    initialize_logging(logging.getLogger('smqtk'), is_debug and logging.DEBUG
                       or logging.INFO)
    log.debug("Showing debug messages.")

    config = get_default_config()
    config_loaded = False
    if config_path and os.path.isfile(config_path):
        with open(config_path) as f:
            log.info("Loading configuration: %s", config_path)
            config.update(json.load(f))
        config_loaded = True
    output_config(generate_config, config, log, config_overwrite, 100)

    if not config_loaded:
        log.error("No configuration provided")
        exit(101)

    classify_files(config, label, file_globs)
Пример #2
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    llevel = logging.DEBUG if args.verbose else logging.INFO
    bin_utils.initialize_logging(logging.getLogger(), llevel)
    log = logging.getLogger("main")

    # Merge loaded config with default
    config = default_config()
    if args.config:
        if osp.isfile(args.config):
            with open(args.config, 'r') as f:
                config.update(json.load(f))
        elif not osp.isfile(args.config):
            log.error("Configuration file path not valid.")
            exit(1)

    bin_utils.output_config(args.output_config, config, log, True)

    # Default config options for this util are valid for running, so no "has
    # config loaded check here.

    port = int(config['port'])
    authkey = str(config['authkey'])

    mgr = ProxyManager(('', port), authkey)
    mgr.get_server().serve_forever()
Пример #3
0
def main():
    log = logging.getLogger(__name__)
    parser = get_cli_parser()
    args = parser.parse_args()

    config_path = args.config
    generate_config = args.generate_config
    config_overwrite = args.overwrite
    iqr_state_fp = args.iqr_state
    is_debug = args.debug

    initialize_logging(logging.getLogger(),
                       is_debug and logging.DEBUG or logging.INFO)
    log.debug("Showing debug messages.")

    config = get_default_config()
    config_loaded = False
    if config_path and os.path.isfile(config_path):
        with open(config_path) as f:
            log.info("Loading configuration: %s", config_path)
            config.update(
                json.load(f)
            )
        config_loaded = True
    output_config(generate_config, config, log, config_overwrite, 100)

    if not config_loaded:
        log.error("No configuration provided")
        exit(101)

    if not os.path.isfile(iqr_state_fp):
        log.error("IQR Session info JSON filepath was invalid")
        exit(102)

    train_classifier_iqr(config, iqr_state_fp)
Пример #4
0
def main():
    log = logging.getLogger(__name__)
    parser = get_cli_parser()
    args = parser.parse_args()

    config_path = args.config
    generate_config = args.generate_config
    config_overwrite = args.overwrite
    is_debug = args.verbose

    label = args.label
    file_globs = args.file_globs

    initialize_logging(logging.getLogger('__main__'),
                       is_debug and logging.DEBUG or logging.INFO)
    initialize_logging(logging.getLogger('smqtk'),
                       is_debug and logging.DEBUG or logging.INFO)
    log.debug("Showing debug messages.")

    config = get_default_config()
    config_loaded = False
    if config_path and os.path.isfile(config_path):
        with open(config_path) as f:
            log.info("Loading configuration: %s", config_path)
            config.update(
                json.load(f)
            )
        config_loaded = True
    output_config(generate_config, config, log, config_overwrite, 100)

    if not config_loaded:
        log.error("No configuration provided")
        exit(101)

    classify_files(config, label, file_globs)
Пример #5
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    llevel = logging.DEBUG if args.verbose else logging.INFO
    bin_utils.initialize_logging(logging.getLogger(), llevel)
    log = logging.getLogger("main")

    # Merge loaded config with default
    config = default_config()
    if args.config:
        if osp.isfile(args.config):
            with open(args.config, 'r') as f:
                config.update(json.load(f))
        elif not osp.isfile(args.config):
            log.error("Configuration file path not valid.")
            exit(1)

    bin_utils.output_config(args.output_config, config, log, True)

    # Default config options for this util are valid for running, so no "has
    # config loaded check here.

    port = int(config['port'])
    authkey = str(config['authkey'])

    mgr = ProxyManager(('', port), authkey)
    mgr.get_server().serve_forever()
Пример #6
0
def main():
    parser = bin_utils.SMQTKOptParser()
    parser.add_option("-c", "--config", type=str, help="Path to the configuration file.")
    parser.add_option("-v", "--verbose", action="store_true", default=False, help="Add debugging log messages.")
    opts, args = parser.parse_args()

    bin_utils.initialize_logging(logging.getLogger(), logging.INFO - (10 * opts.verbose))

    config_file = opts.config
    assert config_file is not None, "Not given a configuration file for the server!"
    assert osp.exists(config_file), "Given config file path does not exist."
    assert not osp.isdir(config_file), "Given config file is a directory!"

    config = SafeConfigCommentParser()
    parsed = config.read(config_file)
    assert parsed, "Configuration file not parsed!"
    section = "server"
    assert config.has_section(section), "No server section found!"
    assert config.has_option(section, "port"), "No port option in config!"
    assert config.has_option(section, "authkey"), "No authkey option in config!"
    port = config.getint(section, "port")
    authkey = config.get(section, "authkey")

    mgr = ProxyManager(("", port), authkey)
    mgr.get_server().serve_forever()
Пример #7
0
def main():
    log = logging.getLogger(__name__)
    parser = get_cli_parser()
    args = parser.parse_args()

    config_path = args.config
    generate_config = args.generate_config
    config_overwrite = args.overwrite
    iqr_state_fp = args.iqr_state
    is_debug = args.debug

    initialize_logging(logging.getLogger(), is_debug and logging.DEBUG
                       or logging.INFO)
    log.debug("Showing debug messages.")

    config = get_default_config()
    config_loaded = False
    if config_path and os.path.isfile(config_path):
        with open(config_path) as f:
            log.info("Loading configuration: %s", config_path)
            config.update(json.load(f))
        config_loaded = True
    output_config(generate_config, config, log, config_overwrite, 100)

    if not config_loaded:
        log.error("No configuration provided")
        exit(101)

    if not os.path.isfile(iqr_state_fp):
        log.error("IQR Session info JSON filepath was invalid")
        exit(102)

    train_classifier_iqr(config, iqr_state_fp)
Пример #8
0
def main():
    usage = "%prog [options] GLOB [ GLOB [ ... ] ]"
    description = "Create a file-based ingest from a set of local file paths " \
                  "or shell-style glob strings."

    parser = bin_utils.SMQTKOptParser(usage, description=description)
    parser.add_option('-s', '--set-label',
                      help="Configured ingest to 'ingest' into.")
    parser.add_option('-l', '--list-ingests', action='store_true',
                      default=False,
                      help="List available ingests we can ingest new data "
                           "into. See the system_config.json file in the etc "
                           "directory for more details.")
    parser.add_option('-v', '--verbose', action='store_true', default=False,
                      help='Add debug messaged to output logging.')
    opts, args = parser.parse_args()

    bin_utils.initialize_logging(logging.getLogger(),
                                logging.INFO - (10*opts.verbose))
    log = logging.getLogger("main")

    if opts.list_ingests:
        # Find labels for configured data sets that are of the FileSet type
        file_ds_labels = [
            l
            for l, dsc in smqtk_config.SYSTEM_CONFIG['DataSets'].iteritems()
            if dsc['type'] == "DataFileSet"
        ]

        log.info("")
        log.info("Available File-based datasets:")
        for k in sorted(file_ds_labels):
            log.info("\t%s", k)
        log.info("")
        exit(0)

    if opts.set_label is None:
        log.info("")
        log.info("ERROR: Please provide data set configuration label.")
        log.info("")
        exit(1)

    fds = DataSetConfiguration.new_inst(opts.set_label)
    log.debug("Script arguments:\n%s" % args)

    def ingest_file(fp):
        fds.add_data(DataFileElement(fp))

    for f in args:
        f = osp.expanduser(f)
        if osp.isfile(f):
            ingest_file(f)
        else:
            log.debug("Expanding glob: %s" % f)
            for g in glob.glob(f):
                ingest_file(g)
def main():
    bin_utils.initialize_logging(logging.getLogger(), logging.DEBUG)
    log = logging.getLogger(__name__)

    # For each file in descriptor vector file tree, load from file
    # [type, uuid, vector] and insert into PSQL element.

    log.info("Setting up parallel environment")
    in_queue = multiprocessing.Queue()
    workers = []
    for i in xrange(multiprocessing.cpu_count()):
        p = multiprocessing.Process(
            target=proc_transfer,
            args=(in_queue,)
        )
        workers.append(p)
        p.start()

    try:
        log.info("Loading filename list")
        with open("descriptor_file_names.5.3mil.pickle") as f:
            fname_list = cPickle.load(f)
   
        log.info("Running through filename list")
        for n in fname_list:
            m = fname_re.match(n)
            assert m
    
            type_str = m.group(1)
            uuid_str = m.group(2)
    
            #print type_str, uuid_str
            #break
            in_queue.put( (type_str, uuid_str) )
    
        log.info("Sending worker terminal packets")
        for w in workers:
            in_queue.put(None)

    except:
        log.info("Terminating workers")
        for w in workers:
            w.terminate()

    finally:
        log.info("Waiting for workers to complete")
        for w in workers:
            w.join()
        log.info("Workers joined")
def main():
    bin_utils.initialize_logging(logging.getLogger(), logging.DEBUG)
    log = logging.getLogger(__name__)

    # For each file in descriptor vector file tree, load from file
    # [type, uuid, vector] and insert into PSQL element.

    log.info("Setting up parallel environment")
    in_queue = multiprocessing.Queue()
    workers = []
    for i in xrange(multiprocessing.cpu_count()):
        p = multiprocessing.Process(
            target=proc_transfer,
            args=(in_queue,)
        )
        workers.append(p)
        p.start()

    try:
        log.info("Loading filename list")
        with open("descriptor_file_names.5.3mil.pickle") as f:
            fname_list = cPickle.load(f)

        log.info("Running through filename list")
        for n in fname_list:
            m = fname_re.match(n)
            assert m

            type_str = m.group(1)
            uuid_str = m.group(2)

            #print type_str, uuid_str
            #break
            in_queue.put( (type_str, uuid_str) )

        log.info("Sending worker terminal packets")
        for w in workers:
            in_queue.put(None)

    except:
        log.info("Terminating workers")
        for w in workers:
            w.terminate()

    finally:
        log.info("Waiting for workers to complete")
        for w in workers:
            w.join()
        log.info("Workers joined")
Пример #11
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    logging_level = logging.INFO
    if args.verbose:
        logging_level = logging.DEBUG
    initialize_logging(logging.getLogger("smqtk"), logging_level)

    base_dir = args.base_dir
    interval_seconds = args.interval
    expiry_seconds = args.expiry

    interval_scan(interval_seconds, base_dir, expiry_seconds,
                  remove_file_action)
Пример #12
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    logging_level = logging.INFO
    if args.verbose:
        logging_level = logging.DEBUG
    initialize_logging(logging.getLogger("smqtk"), logging_level)

    base_dir = args.base_dir
    interval_seconds = args.interval
    expiry_seconds = args.expiry

    interval_scan(interval_seconds, base_dir, expiry_seconds,
                  remove_file_action)
Пример #13
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    bin_utils.initialize_logging(logging.getLogger(),
                                 logging.INFO - (10 * args.verbose))
    log = logging.getLogger("main")

    # Merge loaded config with default
    config_loaded = False
    config = default_config()
    if args.config:
        if osp.isfile(args.config):
            with open(args.config, 'r') as f:
                config.update(json.load(f))
            config_loaded = True
        elif not osp.isfile(args.config):
            log.error("Configuration file path not valid.")
            exit(1)

    # output configuration dictionary when asked for.
    bin_utils.output_config(args.output_config, config, log, True)

    if not config_loaded:
        log.error("No configuration provided")
        exit(1)

    log.debug("Script arguments:\n%s" % args)

    def iter_input_elements():
        for f in args.input_files:
            f = osp.expanduser(f)
            if osp.isfile(f):
                yield DataFileElement(f)
            else:
                log.debug("Expanding glob: %s" % f)
                for g in glob.glob(f):
                    yield DataFileElement(g)

    log.info("Adding elements to data set")
    #: :type: smqtk.representation.DataSet
    ds = plugin.from_plugin_config(config['data_set'], get_data_set_impls())
    ds.add_data(*iter_input_elements())
Пример #14
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    bin_utils.initialize_logging(logging.getLogger(),
                                 logging.INFO - (10 * args.verbose))
    log = logging.getLogger("main")

    # Merge loaded config with default
    config_loaded = False
    config = default_config()
    if args.config:
        if osp.isfile(args.config):
            with open(args.config, 'r') as f:
                config.update(json.load(f))
            config_loaded = True
        elif not osp.isfile(args.config):
            log.error("Configuration file path not valid.")
            exit(1)

    # output configuration dictionary when asked for.
    bin_utils.output_config(args.output_config, config, log, True)

    if not config_loaded:
        log.error("No configuration provided")
        exit(1)

    log.debug("Script arguments:\n%s" % args)

    def iter_input_elements():
        for f in args.input_files:
            f = osp.expanduser(f)
            if osp.isfile(f):
                yield DataFileElement(f)
            else:
                log.debug("Expanding glob: %s" % f)
                for g in glob.glob(f):
                    yield DataFileElement(g)

    log.info("Adding elements to data set")
    #: :type: smqtk.representation.DataSet
    ds = plugin.from_plugin_config(config['data_set'], get_data_set_impls())
    ds.add_data(*iter_input_elements())
Пример #15
0
def main():
    from smqtk.utils.bin_utils import initialize_logging, SMQTKOptParser

    parser = SMQTKOptParser()
    parser.add_option("-d", "--base-dir", help="Starting directory for scan.")
    parser.add_option("-i", "--interval", type=int, help="Number of seconds between each scan (integer).")
    parser.add_option("-e", "--expiry", type=int, help='Number of seconds until a file has "expired" ' "(integer).")
    parser.add_option("-v", "--verbose", action="store_true", default=False, help="Display more messages (debugging).")
    opts, args = parser.parse_args()

    logging_level = logging.INFO
    if opts.verbose:
        logging_level = logging.DEBUG
    initialize_logging(logging.getLogger("smqtk"), logging_level)

    base_dir = opts.base_dir
    interval_seconds = opts.interval
    expiry_seconds = opts.expiry

    interval_scan(interval_seconds, base_dir, expiry_seconds, remove_file_action)
Пример #16
0
def main():
    usage = "%prog [options] GLOB [ GLOB [ ... ] ]"
    description = "Add a set of local system files to a data set via " \
                  "explicit paths or shell-style glob strings."

    parser = bin_utils.SMQTKOptParser(usage, description=description)
    parser.add_option('-c', '--config',
                      help="Path to the JSON configuration file")
    parser.add_option('--output-config',
                      help="Optional path to output a default configuration "
                           "file to. This output file should be modified and "
                           "used for this executable.")
    parser.add_option('-v', '--verbose', action='store_true', default=False,
                      help='Add debug messaged to output logging.')
    opts, args = parser.parse_args()

    bin_utils.initialize_logging(logging.getLogger(),
                                 logging.INFO - (10*opts.verbose))
    log = logging.getLogger("main")

    # output configuration dictionary when asked for.
    bin_utils.output_config(opts.output_config, default_config(), log)

    with open(opts.config, 'r') as f:
        config = json.load(f)

    #: :type: smqtk.representation.DataSet
    ds = plugin.from_plugin_config(config['data_set'], get_data_set_impls)
    log.debug("Script arguments:\n%s" % args)

    def ingest_file(fp):
        ds.add_data(DataFileElement(fp))

    for f in args:
        f = osp.expanduser(f)
        if osp.isfile(f):
            ingest_file(f)
        else:
            log.debug("Expanding glob: %s" % f)
            for g in glob.glob(f):
                ingest_file(g)
Пример #17
0
def main():
    # Print help and exit if no arguments were passed
    if len(sys.argv) == 1:
        get_cli_parser().print_help()
        sys.exit(1)

    args = get_cli_parser().parse_args()
    llevel = logging.INFO if not args.verbose else logging.DEBUG
    initialize_logging(logging.getLogger('smqtk'), llevel)
    initialize_logging(logging.getLogger('__main__'), llevel)

    log = logging.getLogger(__name__)
    log.debug('Showing debug messages.')

    if args.file_list is not None and not os.path.exists(args.file_list):
        log.error('Invalid file list path: %s', args.file_list)
        exit(103)

    def check_image(image_path):
        if not os.path.exists(image_path):
            log.warn('Invalid image path given (does not exist): %s',
                     image_path)
            return False, False
        else:
            d = DataFileElement(image_path)
            return is_valid_element(d, check_image=True), d

    with open(args.file_list) as infile:
        checked_images = parallel.parallel_map(check_image,
                                               map(str.strip, infile),
                                               name='check-image-validity',
                                               use_multiprocessing=True)

        for is_valid, dfe in checked_images:
            if dfe:  # in the case of a non-existent file
                if (is_valid and not args.invert) or \
                        (not is_valid and args.invert):
                    # We know the callback above is creating DataFileElement
                    # instances.
                    # noinspection PyProtectedMember
                    print('%s,%s' % (dfe._filepath, dfe.uuid()))
Пример #18
0
def main():
    args = cli_parser().parse_args()

    initialize_logging(logging.getLogger('smqtk'), logging.DEBUG)
    initialize_logging(logging.getLogger('__main__'), logging.DEBUG)
    log = logging.getLogger(__name__)

    hash2uuids_fp = os.path.abspath(args.hash2uuids_fp)
    bit_len = args.bit_len
    leaf_size = args.leaf_size
    rand_seed = args.rand_seed
    balltree_model_fp = os.path.abspath(args.balltree_model_fp)

    assert os.path.isfile(hash2uuids_fp), "Bad path: '%s'" % hash2uuids_fp
    assert os.path.isdir(os.path.dirname(balltree_model_fp)), \
        "Bad path: %s" % balltree_model_fp

    log.debug("hash2uuids_fp    : %s", hash2uuids_fp)
    log.debug("bit_len          : %d", bit_len)
    log.debug("leaf_size        : %d", leaf_size)
    log.debug("rand_seed        : %d", rand_seed)
    log.debug("balltree_model_fp: %s", balltree_model_fp)


    log.info("Loading hash2uuids table")
    with open(hash2uuids_fp) as f:
        hash2uuids = cPickle.load(f)

    log.info("Computing hash-code vectors")
    hash_vectors = []  #[int_to_bit_vector_large(h, bit_len) for h in hash2uuids]
    rs = [0] * 7
    for h in hash2uuids:
        hash_vectors.append( int_to_bit_vector_large(h, bit_len) )
        report_progress(log.debug, rs, 1.)

    log.info("Initializing ball tree")
    btree = SkLearnBallTreeHashIndex(balltree_model_fp, leaf_size, rand_seed)

    log.info("Building ball tree")
    btree.build_index(hash_vectors)
Пример #19
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    debug_smqtk = args.debug_smqtk or args.verbose
    debug_server = args.debug_server or args.verbose

    bin_utils.initialize_logging(logging.getLogger("__main__"),
                                 logging.INFO - (10 * debug_smqtk))
    bin_utils.initialize_logging(logging.getLogger("smqtk"),
                                 logging.INFO - (10 * debug_smqtk))
    bin_utils.initialize_logging(logging.getLogger("werkzeug"),
                                 logging.WARN - (20 * debug_server))
    log = logging.getLogger(__name__)

    web_applications = smqtk.web.get_web_applications()

    if args.list:
        log.info("")
        log.info("Available applications:")
        log.info("")
        for l in web_applications:
            log.info("\t" + l)
        log.info("")
        exit(0)

    application_name = args.application

    if application_name is None:
        log.error("No application name given!")
        exit(1)
    elif application_name not in web_applications:
        log.error("Invalid application label '%s'", application_name)
        exit(1)

    app_class = web_applications[application_name]

    bin_utils.utility_main_helper(app_class.get_default_config,
                                  args,
                                  skip_logging_init=True)

    host = args.host
    port = args.port and int(args.port)
    use_reloader = args.reload
    use_threading = args.threaded
    use_basic_auth = args.use_basic_auth

    # noinspection PyUnresolvedReferences
    app = app_class.from_config(config)
    if use_basic_auth:
        app.config["BASIC_AUTH_FORCE"] = True
        BasicAuth(app)
    app.config['DEBUG'] = debug_server

    app.run(host=host,
            port=port,
            debug=debug_server,
            use_reloader=use_reloader,
            threaded=use_threading)
Пример #20
0

def add_descriptors_smallcodes():
    log = logging.getLogger(__name__)

    log.info("Loading descriptor UUIDs")
    with open(UUIDS_FILEPATH) as f:
        descriptor_uuids = cPickle.load(f)

    log.info("Loading ITQ components")
    r = np.load("/data/shared/memex/ht_image_cnn/itq_model/16-bit/rotation.npy")
    mv = np.load("/data/shared/memex/ht_image_cnn/itq_model/16-bit/mean_vec.npy")

    log.info("Making small-codes")
    sc_d_pairs = async_compute_smallcodes(
        r, mv, make_elements_from_uuids(descriptor_uuids)
    )

    log.info("Loading ITQ model")
    itq_index = load_algo()

    log.info("Adding small codes")
    itq_index._code_index.add_many_descriptors(sc_d_pairs)

    return descriptor_uuids, itq_index


if __name__ == "__main__":
    initialize_logging(logging.getLogger(), logging.DEBUG)
    filenames, itq_index = add_descriptors_smallcodes()
Пример #21
0
CAFFE_LABELS = "labels.txt"

# CSV file detailing [cluster_id, ad_id, image_sha1] relationships.
EVAL_CLUSTERS_ADS_IMAGES_CSV = "eval.CP1_clusters_ads_images.csv"
# json-lines file of clusters missing from the above file. Should be at least
# composed of: {"cluster_id": <str>, ... }
EVAL_MISSING_CLUSTERS = "eval.cluster_scores.missing_clusters.jl"

OUTPUT_DESCR_PROB_INDEX = "cp1_img_prob_descriptors.pickle"
OUTPUT_MAX_JL = "cp1_scores_max.jl"
OUTPUT_AVG_JL = "cp1_scores_avg.jl"

###############################################################################

# Compute classification scores
initialize_logging(logging.getLogger('smqtk'), logging.DEBUG)

eval_data_set = DataMemorySet(EVAL_DATASET)
img_prob_descr_index = MemoryDescriptorIndex(OUTPUT_DESCR_PROB_INDEX)

img_prob_gen = CaffeDescriptorGenerator(CAFFE_DEPLOY,
                                        CAFFE_MODEL,
                                        CAFFE_IMG_MEAN,
                                        'prob',
                                        batch_size=1000,
                                        use_gpu=True,
                                        load_truncated_images=True)

img_c_mem_factory = ClassificationElementFactory(MemoryClassificationElement,
                                                 {})
img_prob_classifier = IndexLabelClassifier(CAFFE_LABELS)
Пример #22
0
if __name__ == "__main__":
    p = cli_parser()
    args = p.parse_args()

    debug = args.debug
    config_fp = args.config
    out_config_fp = args.gen_config
    completed_files_fp = args.completed_files
    filelist_fp = args.file_list
    batch_size = args.batch_size

    # Initialize logging
    llevel = debug and logging.DEBUG or logging.INFO
    if not logging.getLogger("smqtk").handlers:
        initialize_logging(logging.getLogger("smqtk"), llevel)
    if not logging.getLogger("__main__").handlers:
        initialize_logging(logging.getLogger("__main__"), llevel)

    l = logging.getLogger(__name__)

    # Merge loaded config with default
    config_loaded = False
    c = default_config()
    if config_fp:
        if os.path.isfile(config_fp):
            with open(config_fp) as f:
                c.update(json.loads(jsmin(f.read())))
            config_loaded = True
        else:
            l.error("Config file path not valid")
Пример #23
0
def main():
    usage = "%prog [OPTIONS] INPUT_FILE"
    description = """\
Compute a descriptor vector for a given data file, outputting the generated
feature vector to standard out, or to an output file if one was specified (in
numpy format).
"""
    parser = bin_utils.SMQTKOptParser(usage, description=description)

    group_labels = optparse.OptionGroup(parser, "Configuration")
    group_labels.add_option('-c', '--config',
                            default=None,
                            help='Path to the JSON configuration file.')
    group_labels.add_option('--output-config',
                            default=None,
                            help='Optional path to output default JSON '
                                 'configuration to.')
    parser.add_option_group(group_labels)

    group_optional = optparse.OptionGroup(parser, "Optional Parameters")
    group_optional.add_option('--overwrite',
                              action='store_true', default=False,
                              help="Force descriptor computation even if an "
                                   "existing descriptor vector was discovered "
                                   "based on the given content descriptor type "
                                   "and data combination.")
    group_optional.add_option('-o', '--output-filepath',
                              help='Optional path to a file to output feature '
                                   'vector to. Otherwise the feature vector is '
                                   'printed to standard out. Output is saved '
                                   'in numpy binary format (.npy suffix '
                                   'recommended).')
    group_optional.add_option('-v', '--verbose',
                              action='store_true', default=False,
                              help='Print additional debugging messages. All '
                                   'logging goes to standard error.')
    parser.add_option_group(group_optional)

    opts, args = parser.parse_args()

    output_filepath = opts.output_filepath
    overwrite = opts.overwrite
    verbose = opts.verbose

    llevel = logging.DEBUG if verbose else logging.INFO
    bin_utils.initialize_logging(logging.getLogger(), llevel)
    log = logging.getLogger("main")

    bin_utils.output_config(opts.output_config, default_config(), log)

    if not opts.config:
        log.error("No configuration provided")
        exit(1)
    elif not os.path.isfile(opts.config):
        log.error("Configuration file path not valid.")
        exit(1)

    if len(args) == 0:
        log.error("Failed to provide an input file path")
        exit(1)
    if len(args) > 1:
        log.warning("More than one filepath provided as an argument. Only "
                    "computing for the first one.")

    with open(opts.config, 'r') as f:
        config = json.load(f)

    input_filepath = args[0]
    data_element = DataFileElement(input_filepath)

    factory = DescriptorElementFactory.from_config(config['descriptor_factory'])
    #: :type: smqtk.descriptor_generator.DescriptorGenerator
    cd = plugin.from_plugin_config(config['content_descriptor'],
                                   get_descriptor_generator_impls)
    descr_elem = cd.compute_descriptor(data_element, factory, overwrite)
    vec = descr_elem.vector()

    if vec is None:
        log.error("Failed to generate a descriptor vector for the input data!")

    if output_filepath:
        numpy.save(output_filepath, vec)
    else:
        # Construct string, because numpy
        s = []
        # noinspection PyTypeChecker
        for f in vec:
            s.append('%15f' % f)
        print ' '.join(s)
Пример #24
0
def main():
    parser = bin_utils.SMQTKOptParser()
    parser.add_option('-c', '--config', default=None,
                      help='Path to an smqtk configuration extension file '
                           '(a python file).')
    parser.add_option('-a', '--application', default=None,
                      help="Name of the web application to run. Required.")

    parser.add_option('-r', '--reload', action='store_true', default=False,
                      help='Turn on server reloading.')
    parser.add_option('-t', '--threaded', action='store_true', default=False,
                      help="Turn on web searcher threading.")
    parser.add_option('--debug-server', action='store_true', default=False,
                      help='Turn on server debugging messages')
    parser.add_option('--debug-backend', action='store_true', default=False,
                      help='Turn on smqtk backend debugging messages')

    parser.add_option('--host', default=None,
                      help="Run host address specification override. This will "
                           "override all other configuration method "
                           "specifications.")
    parser.add_option('--port', default=None,
                      help="Run port specification override. This will "
                           "override all other configuration method "
                           "specifications.")
    parser.add_option("--use-basic-auth", action="store_true", default=False,
                      help="Use global basic authentication as configured.")
    parser.add_option('-l', '--list', default=False, action="store_true",
                      help="List currently available applications for running.")
    opts, args = parser.parse_args()

    bin_utils.initialize_logging(logging.getLogger("smqtk"),
                                 logging.INFO - (10*opts.debug_backend))
    bin_utils.initialize_logging(logging.getLogger("werkzeug"),
                                 logging.WARN - (20*opts.debug_server))
    log = logging.getLogger("smqtk.main")

    if opts.list:
        from smqtk.web import APPLICATIONS
        log.info("")
        log.info("Available applications:")
        log.info("")
        for e in APPLICATIONS:
            log.info("\t%s" % e.__name__)
        log.info("")
        exit(0)

    host = opts.host
    port = opts.port and int(opts.port)
    debug_server = opts.debug_server
    use_reloader = opts.reload
    use_threading = opts.threaded
    application_name = opts.application
    use_basic_auth = opts.use_basic_auth

    if application_name is None:
        raise ValueError("No application name given!")

    import smqtk.web
    # noinspection PyPep8Naming
    App = getattr(smqtk.web, application_name, None)
    if App is None:
        raise ValueError("No available application by the name of '%s'"
                         % application_name)
    app = App(opts.config)
    if use_basic_auth:
        app.config["BASIC_AUTH_FORCE"] = True
        BasicAuth(app)
    app.config['DEBUG'] = debug_server

    app.run(host=host, port=port, debug=debug_server, use_reloader=use_reloader,
            threaded=use_threading)
Пример #25
0
import json
import logging

from smqtk import algorithms
from smqtk import representation
from smqtk.utils import bin_utils, jsmin, plugin


__author__ = '*****@*****.**'


#
# Setup logging
#
if not logging.getLogger().handlers:
    bin_utils.initialize_logging(logging.getLogger(), logging.INFO)


#
# Input parameters
#
# The following dictionaries are JSON configurations that are used to
# configure the various data structures and algorithms needed for the IQR demo
# application. Values here can be changed to suit your specific data and
# algorithm needs.
#
# See algorithm implementation doc-strings for more information on configuration
# parameters (see implementation class ``__init__`` method).
#
search_app_config_filepath = "/Users/purg/dev/smqtk/source/python/smqtk/web/" \
                             "search_app/config.IqrSearchApp.json"
Пример #26
0
from smqtk.utils.file_utils import safe_create_dir
from smqtk.utils.parallel import parallel_map


################################################################################
# PARAMETERS

# Confirmed there are no conflicting truth labels on a CDR and URL basis
ad_image_csv = "ad-images.source.url_ad_label.csv"
ad_phone_csv = "ad-images.source.ad_phone.csv"
image_output_dir = "ad-images"

################################################################################


initialize_logging(logging.getLogger('__main__'), logging.INFO)
initialize_logging(logging.getLogger('smqtk'), logging.INFO)
log = logging.getLogger(__name__)


if '.jfif' in mimetypes.types_map:
    del mimetypes.types_map['.jfif']
if '.jpe' in mimetypes.types_map:
    del mimetypes.types_map['.jpe']


def dl_ad_image(url, output_dir):
    """
    Returns (None, None, None) if failed, otherwise (url, filepath, sha1)
    """
    log = logging.getLogger(__name__)
Пример #27
0
import csv
import json
import logging

from matplotlib import pyplot as plt
import numpy
from sklearn.metrics import auc, confusion_matrix, precision_recall_curve, roc_curve

from smqtk.algorithms import get_classifier_impls
from smqtk.representation import ClassificationElementFactory
from smqtk.representation.classification_element.memory import MemoryClassificationElement
from smqtk.representation.descriptor_index.memory import MemoryDescriptorIndex
from smqtk.utils.bin_utils import initialize_logging
from smqtk.utils.plugin import from_plugin_config

initialize_logging(logging.getLogger(), logging.INFO)
log = logging.getLogger(__name__)

###############################################################################
# Parameters
#
PHONE_SHA1_JSON = "eval.map.phone2shas.json"
DESCRIPTOR_INDEX_FILE_CACHE = "eval.images.descriptors.alexnet_fc7.index"

CLASSIFIER_TRAINING_CONFIG_JSON = 'ad-images.final.cmv.train.json'

PHONE2SCORE_OUTPUT_FILEPATH = "eval.results.full_model.phone2score.csv"

# Optional for ROC generation, using PHONE2SCORE_OUTPUT_FILEPATH as input, and
# outputting plots
PHONE2TRUTH = 'eval.source.phone2truth.json'
Пример #28
0
def main():
    usage = "%prog [OPTIONS] INPUT_FILE"
    description = "Compute a feature vector for a given data file, outputting " \
                  "the generated feature vector to standard out, or to an " \
                  "output file if one was specified.\n" \
                  "\n" \
                  "An ingest " \
                  "configuration must be specified for the purpose of " \
                  "identifying which model files to use (assuming a given " \
                  "descriptor has/uses model files). The ingest configuration " \
                  "also informs where to put temporary working files. "
    parser = bin_utils.SMQTKOptParser(usage, description=description)
    parser.add_option('-c', '--content-descriptor',
                      help='The descriptor type to use. This must be a type '
                           'available in system configuration')
    parser.add_option('-o', '--output-filepath',
                      help='Optional path to a file to output feature vector '
                           'to. Otherwise the feature vector is printed to '
                           'standard out. Output is saved in numpy binary '
                           'format (.npy suffix recommended).')
    parser.add_option('-l', '--list', action='store_true', default=False,
                      help='List available descriptor types.')
    parser.add_option('-v', '--verbose', action='store_true', default=False,
                      help='Print additional debugging messages. All logging '
                           'goes to standard error.')
    opts, args = parser.parse_args()

    output_filepath = opts.output_filepath
    descriptor_type = opts.content_descriptor
    verbose = opts.verbose

    llevel = logging.DEBUG if verbose else logging.INFO
    bin_utils.initialize_logging(logging.getLogger(), llevel)
    log = logging.getLogger("main")

    if opts.list:
        log.info("")
        log.info("Available ContentDescriptor types:")
        log.info("")
        for dl in ContentDescriptorConfiguration.available_labels():
            log.info("\t%s", dl)
        log.info("")
        exit(0)

    if len(args) == 0:
        log.error("Failed to provide an input file path")
        exit(1)
    if len(args) > 1:
        log.warning("More than one filepath provided as an argument. Only "
                    "computing for the first one.")

    input_filepath = args[0]
    data_element = DataFileElement(input_filepath)

    fd = ContentDescriptorConfiguration.new_inst(descriptor_type)
    feat = fd.compute_descriptor(data_element)

    if output_filepath:
        numpy.save(output_filepath, feat)
    else:
        # Construct string, because numpy
        s = []
        for f in feat:
            s.append('%15f' % f)
        print ' '.join(s)
Пример #29
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    output_filepath = args.output_filepath
    overwrite = args.overwrite
    verbose = args.verbose

    llevel = logging.DEBUG if verbose else logging.INFO
    bin_utils.initialize_logging(logging.getLogger(), llevel)
    log = logging.getLogger("main")

    # Merge loaded config with default
    config_loaded = False
    config = default_config()
    if args.config:
        if os.path.isfile(args.config):
            with open(args.config, 'r') as f:
                config.update(json.load(f))
            config_loaded = True
        elif not os.path.isfile(args.config):
            log.error("Configuration file path not valid.")
            exit(1)

    bin_utils.output_config(args.output_config, config, log, True)

    # Configuration must have been loaded at this point since we can't normally
    # trust the default.
    if not config_loaded:
        log.error("No configuration provided")
        exit(1)

    if not args.input_file:
        log.error("Failed to provide an input file path")
        exit(1)
    elif not os.path.isfile(args.input_file):
        log.error("Given path does not point to a file.")
        exit(1)

    input_filepath = args.input_file
    data_element = DataFileElement(input_filepath)

    factory = DescriptorElementFactory.from_config(config['descriptor_factory'])
    #: :type: smqtk.algorithms.descriptor_generator.DescriptorGenerator
    cd = plugin.from_plugin_config(config['content_descriptor'],
                                   get_descriptor_generator_impls())
    descr_elem = cd.compute_descriptor(data_element, factory, overwrite)
    vec = descr_elem.vector()

    if vec is None:
        log.error("Failed to generate a descriptor vector for the input data!")

    if output_filepath:
        numpy.save(output_filepath, vec)
    else:
        # Construct string, because numpy
        s = []
        # noinspection PyTypeChecker
        for f in vec:
            s.append('%15f' % f)
        print ' '.join(s)
Пример #30
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    debug_smqtk = args.debug_smqtk or args.verbose
    debug_server = args.debug_server or args.verbose

    bin_utils.initialize_logging(logging.getLogger("__main__"),
                                 logging.INFO - (10 * debug_smqtk))
    bin_utils.initialize_logging(logging.getLogger("smqtk"),
                                 logging.INFO - (10*debug_smqtk))
    bin_utils.initialize_logging(logging.getLogger("werkzeug"),
                                 logging.WARN - (20*debug_server))
    log = logging.getLogger(__name__)

    web_applications = smqtk.web.get_web_applications()

    if args.list:
        log.info("")
        log.info("Available applications:")
        log.info("")
        for l, cls in six.iteritems(web_applications):
            log.info("\t" + l)
            if debug_smqtk:
                log.info('\t' + ('^'*len(l)) + '\n' +

                         cls.__doc__ + '\n' +
                         ('*' * 80) + '\n')
        log.info("")
        exit(0)

    application_name = args.application

    if application_name is None:
        log.error("No application name given!")
        exit(1)
    elif application_name not in web_applications:
        log.error("Invalid application label '%s'", application_name)
        exit(1)

    #: :type: smqtk.web.SmqtkWebApp
    app_class = web_applications[application_name]

    config = bin_utils.utility_main_helper(app_class.get_default_config, args,
                                           skip_logging_init=True)

    host = args.host
    port = args.port and int(args.port)
    use_reloader = args.reload
    use_threading = args.threaded
    use_basic_auth = args.use_basic_auth

    # noinspection PyUnresolvedReferences
    #: :type: smqtk.web.SmqtkWebApp
    app = app_class.from_config(config)
    if use_basic_auth:
        app.config["BASIC_AUTH_FORCE"] = True
        BasicAuth(app)
    app.config['DEBUG'] = debug_server

    log.info("Starting application")
    app.run(host=host, port=port, debug=debug_server, use_reloader=use_reloader,
            threaded=use_threading)
Пример #31
0
def main():
    import optparse
    description = \
        "Generate the model for the given indexer type, using features " \
        "from the given feature descriptor type. We use configured valued in " \
        "the smqtk_config module and from the system configuration JSON file " \
        "(etc/system_config.json) unless otherwise specified by options to " \
        "this script. Specific ingest used is determined by the ingest type " \
        "provided (-t/--type)."
    parser = bin_utils.SMQTKOptParser(description=description)
    group_required = optparse.OptionGroup(parser, "Required Options")
    group_optional = optparse.OptionGroup(parser, "Optional")

    group_required.add_option('-d', '--data-set',
                              help="Data set to use for model generation.")
    group_required.add_option('-c', '--content-descriptor',
                              help="Feature descriptor type for model and "
                                   "feature generation.")
    group_required.add_option('-i', '--indexer',
                              help="(Optional) Indexer type for model "
                                   "generation.")

    group_optional.add_option('--sys-json',
                              help="Custom system configuration JSON file to "
                                   "use. Otherwise we use the one specified in "
                                   "the smqtk_config module.")
    group_optional.add_option('-l', '--list',
                              action='store_true', default=False,
                              help="List available ingest configurations. If "
                                   "a valid ingest configuration has been "
                                   "specified, we list available "
                                   "FeatureDetector and Indexer configurations "
                                   "available.")
    group_optional.add_option('-t', '--threads', type=int, default=None,
                              help='Number of threads/processes to use for '
                                   'processing. By default we use all '
                                   'available cores/threads.')
    group_optional.add_option('-v', '--verbose', action='store_true',
                              default=False,
                              help='Add debug messaged to output logging.')

    parser.add_option_group(group_required)
    parser.add_option_group(group_optional)
    opts, args = parser.parse_args()

    bin_utils.initialize_logging(logging.getLogger(),
                                logging.INFO - (10*opts.verbose))
    log = logging.getLogger("main")

    dset_label = opts.data_set
    cd_label = opts.content_descriptor
    idxr_label = opts.indexer
    parallel = opts.threads

    # Prep custom JSON configuration if one was given
    if opts.sys_json:
        with open(opts.sys_json) as json_file:
            json_config = json.loads(jsmin(json_file.read()))
        ConfigurationInterface.BASE_CONFIG = json_config['Ingests']

    if opts.list:
        log.info("")
        log.info("Available Data Sets:")
        log.info("")
        for l in DataSetConfiguration.available_labels():
            log.info("\t%s" % l)
        log.info("")
        log.info("Available ContentDescriptor types:")
        log.info("")
        for l in ContentDescriptorConfiguration.available_labels():
            log.info("\t%s" % l)
        log.info("")
        log.info("Available Indexer types:")
        log.info("")
        for l in IndexerConfiguration.available_labels():
            log.info("\t%s", l)
        log.info("")
        exit(0)

    # Check given labels
    fail = False
    if dset_label and dset_label not in DataSetConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "data set configuration!", dset_label)
        fail = True
    if cd_label and cd_label not in ContentDescriptorConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "content descriptor configuration!", cd_label)
        fail = True
    if idxr_label and idxr_label not in IndexerConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "indexer configuration!", idxr_label)
        fail = True
    if fail:
        exit(1)
    del fail

    log.info("Loading data-set instance...")
    #: :type: DataIngest or VideoIngest
    dset = DataSetConfiguration.new_inst(dset_label)

    log.info("Loading descriptor instance...")
    #: :type: smqtk.content_description.ContentDescriptor
    descriptor = ContentDescriptorConfiguration.new_inst(cd_label)
    # Generate any model files needed by the chosen descriptor
    descriptor.PARALLEL = parallel
    descriptor.generate_model(dset)

    # Don't do indexer model generation if a type was not provided
    if idxr_label:
        log.info("Loading indexer instance...")
        #: :type: smqtk.indexing.Indexer
        indexer = IndexerConfiguration.new_inst(idxr_label)

        # It is not guaranteed that the feature computation method is doing
        # anything in parallel, but if it is, request that it perform serially
        # in order to allow multiple high-level feature computation jobs, else
        # we could be overrun with threads.
        descriptor.PARALLEL = 1
        # Using NonDaemonicPool because content_description that might to
        # parallel processing might use multiprocessing.Pool instances, too.
        # Pools don't usually allow daemonic processes, so this custom top-level
        # pool allows worker processes to spawn pools themselves.
        fmap = descriptor.compute_descriptor_async(
            dset,
            parallel=parallel,
            pool_type=NonDaemonicPool
        )

        indexer.generate_model(fmap, parallel=parallel)
Пример #32
0
def main():
    parser = bin_utils.SMQTKOptParser()
    setup_cli(parser)
    opts, args = parser.parse_args()

    debug_smqtk = opts.debug_smqtk
    debug_server = opts.debug_server

    bin_utils.initialize_logging(logging.getLogger("smqtk"),
                                 logging.INFO - (10*debug_smqtk))
    bin_utils.initialize_logging(logging.getLogger("werkzeug"),
                                 logging.WARN - (20*debug_server))
    log = logging.getLogger("smqtk.main")

    web_applications = smqtk.web.get_web_applications()

    if opts.list:
        log.info("")
        log.info("Available applications:")
        log.info("")
        for l in web_applications:
            log.info("\t" + l)
        log.info("")
        exit(0)

    application_name = opts.application

    if application_name is None:
        log.error("No application name given!")
        exit(1)
    elif application_name not in web_applications:
        log.error("Invalid application label '%s'", application_name)
        exit(1)

    app_class = web_applications[application_name]

    # Output config and exit if requested
    bin_utils.output_config(opts.output_config, app_class.get_default_config(),
                            log, opts.overwrite)

    if not opts.config:
        log.error("No configuration provided")
        exit(1)
    elif not os.path.isfile(opts.config):
        log.error("Configuration file path not valid.")
        exit(1)

    with open(opts.config, 'r') as f:
        config = json.loads(jsmin(f.read()))

    host = opts.host
    port = opts.port and int(opts.port)
    use_reloader = opts.reload
    use_threading = opts.threaded
    use_basic_auth = opts.use_basic_auth

    # noinspection PyUnresolvedReferences
    app = app_class.from_config(config)
    if use_basic_auth:
        app.config["BASIC_AUTH_FORCE"] = True
        BasicAuth(app)
    app.config['DEBUG'] = debug_server

    app.run(host=host, port=port, debug=debug_server, use_reloader=use_reloader,
            threaded=use_threading)
Пример #33
0
import json
import logging

from smqtk import algorithms
from smqtk import representation
from smqtk.utils import bin_utils, jsmin, plugin


__author__ = '*****@*****.**'


#
# Setup logging
#
if not logging.getLogger().handlers:
    bin_utils.initialize_logging(logging.getLogger(), logging.DEBUG)
log = logging.getLogger("smqtk.scripts.iqr_app_model_generation")


#
# Input parameters
#
# The following dictionaries are JSON configurations that are used to
# configure the various data structures and algorithms needed for the IQR demo
# application. Values here can be changed to suit your specific data and
# algorithm needs.
#
# See algorithm implementation doc-strings for more information on configuration
# parameters (see implementation class ``__init__`` method).
#
search_app_config_filepath = "/Users/purg/dev/smqtk/source/python/smqtk/web/" \
Пример #34
0
            #for line in chunk_file:
            #    fpath = line.rstrip()
            #    log.debug("Async processing filepath: %s", fpath)
            #    pool.apply_async(process_file, args=(fpath,))

            file_paths = [line.rstrip() for line in chunk_file]
            pool.map(process_file, file_paths)

            pool.close()
            pool.join()
            del pool

            mark_stage(stage_label)
        else:
            log.info("'%s' already complete", stage_label)

        stage_label = osp.basename(chunk_file_path)+'-cleanup'
        if not check_stage(stage_label):
            log.info("Cleaning work tree for chunk '%s'", chunk_file_path)
            if osp.isdir(CLEAN_WORK_DIR):
                shutil.rmtree(CLEAN_WORK_DIR)
            mark_stage(stage_label)
        else:
            log.info("'%s' already complete", stage_label)


if __name__ == '__main__':
    initialize_logging(logging.getLogger(), logging.INFO)
    run()
Пример #35
0
def main():
    usage = "%prog [OPTIONS] INPUT_FILE"
    description = """\
Compute a descriptor vector for a given data file, outputting the generated
feature vector to standard out, or to an output file if one was specified (in
numpy format).
"""
    parser = bin_utils.SMQTKOptParser(usage, description=description)

    group_labels = optparse.OptionGroup(parser, "Configuration Labels")
    group_labels.add_option('-c', '--content-descriptor',
                            help='The descriptor type to use. This must be a '
                                 'type available in the system configuration')
    group_labels.add_option('-f', '--factory-type',
                            help='The DescriptorElementFactory configuration '
                                 'to use when computing the descriptor. This '
                                 'must be a type available in the system '
                                 'configuration.')
    parser.add_option_group(group_labels)

    group_optional = optparse.OptionGroup(parser, "Optional Parameters")
    group_optional.add_option('-l', '--list',
                              action='store_true', default=False,
                              help='List available descriptor types.')
    group_optional.add_option('--overwrite',
                              action='store_true', default=False,
                              help="Force descriptor computation even if an "
                                   "existing descriptor vector was discovered "
                                   "based on the given content descriptor type "
                                   "and data combination.")
    group_optional.add_option('-o', '--output-filepath',
                              help='Optional path to a file to output feature '
                                   'vector to. Otherwise the feature vector is '
                                   'printed to standard out. Output is saved '
                                   'in numpy binary format (.npy suffix '
                                   'recommended).')
    group_optional.add_option('-v', '--verbose',
                              action='store_true', default=False,
                              help='Print additional debugging messages. All '
                                   'logging goes to standard error.')
    parser.add_option_group(group_optional)

    opts, args = parser.parse_args()

    output_filepath = opts.output_filepath
    descriptor_label = opts.content_descriptor
    factory_label = opts.factory_type
    overwrite = opts.overwrite
    verbose = opts.verbose

    llevel = logging.DEBUG if verbose else logging.INFO
    bin_utils.initialize_logging(logging.getLogger(), llevel)
    log = logging.getLogger("main")

    if opts.list:
        log.info("")
        log.info("Available ContentDescriptor types:")
        log.info("")
        for dl in ContentDescriptorConfiguration.available_labels():
            log.info("\t%s", dl)
        log.info("")
        log.info("Available DescriptorElementFactory types:")
        log.info("")
        for df in DescriptorFactoryConfiguration.available_labels():
            log.info("\t%s", df)
        log.info("")
        exit(0)

    if len(args) == 0:
        log.error("Failed to provide an input file path")
        exit(1)
    if len(args) > 1:
        log.warning("More than one filepath provided as an argument. Only "
                    "computing for the first one.")

    input_filepath = args[0]
    data_element = DataFileElement(input_filepath)

    cd = ContentDescriptorConfiguration.new_inst(descriptor_label)
    factory = DescriptorFactoryConfiguration.new_inst(factory_label)
    descr_elem = cd.compute_descriptor(data_element, factory, overwrite)
    vec = descr_elem.vector()

    if vec is None:
        log.error("Failed to generate a descriptor vector for the input data!")

    if output_filepath:
        numpy.save(output_filepath, vec)
    else:
        # Construct string, because numpy
        s = []
        # noinspection PyTypeChecker
        for f in vec:
            s.append('%15f' % f)
        print ' '.join(s)
Пример #36
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    #
    # Setup logging
    #
    if not logging.getLogger().handlers:
        if args.verbose:
            bin_utils.initialize_logging(logging.getLogger(), logging.DEBUG)
        else:
            bin_utils.initialize_logging(logging.getLogger(), logging.INFO)
    log = logging.getLogger("smqtk.scripts.iqr_app_model_generation")

    search_app_config = json.loads(jsmin.jsmin(open(args.config).read()))

    #
    # Input parameters
    #
    # The following dictionaries are JSON configurations that are used to
    # configure the various data structures and algorithms needed for the IQR demo
    # application. Values here can be changed to suit your specific data and
    # algorithm needs.
    #
    # See algorithm implementation doc-strings for more information on configuration
    # parameters (see implementation class ``__init__`` method).
    #

    # base actions on a specific IQR tab configuration (choose index here)
    if args.tab < 0 or args.tab > (len(search_app_config["iqr_tabs"]) - 1):
        log.error("Invalid tab number provided.")
        exit(1)

    search_app_iqr_config = search_app_config["iqr_tabs"][args.tab]

    # Configure DataSet implementation and parameters
    data_set_config = search_app_iqr_config['data_set']

    # Configure DescriptorGenerator algorithm implementation, parameters and
    # persistant model component locations (if implementation has any).
    descriptor_generator_config = search_app_iqr_config['descr_generator']

    # Configure NearestNeighborIndex algorithm implementation, parameters and
    # persistant model component locations (if implementation has any).
    nn_index_config = search_app_iqr_config['nn_index']

    # Configure RelevancyIndex algorithm implementation, parameters and
    # persistant model component locations (if implementation has any).
    #
    # The LibSvmHikRelevancyIndex implementation doesn't actually build a persistant
    # model (or doesn't have to that is), but we're leaving this block here in
    # anticipation of other potential implementations in the future.
    #
    rel_index_config = search_app_iqr_config['rel_index_config']

    # Configure DescriptorElementFactory instance, which defines what implementation
    # of DescriptorElement to use for storing generated descriptor vectors below.
    descriptor_elem_factory_config = search_app_iqr_config[
        'descriptor_factory']

    #
    # Initialize data/algorithms
    #
    # Constructing appropriate data structures and algorithms, needed for the IQR
    # demo application, in preparation for model training.
    #

    descriptor_elem_factory = \
        representation.DescriptorElementFactory \
        .from_config(descriptor_elem_factory_config)

    #: :type: representation.DataSet
    data_set = \
        plugin.from_plugin_config(data_set_config,
                                  representation.get_data_set_impls())
    #: :type: algorithms.DescriptorGenerator
    descriptor_generator = \
        plugin.from_plugin_config(descriptor_generator_config,
                                  algorithms.get_descriptor_generator_impls())

    #: :type: algorithms.NearestNeighborsIndex
    nn_index = \
        plugin.from_plugin_config(nn_index_config,
                                  algorithms.get_nn_index_impls())

    #: :type: algorithms.RelevancyIndex
    rel_index = \
        plugin.from_plugin_config(rel_index_config,
                                  algorithms.get_relevancy_index_impls())

    #
    # Build models
    #
    # Perform the actual building of the models.
    #

    # Add data files to DataSet
    DataFileElement = representation.get_data_element_impls(
    )["DataFileElement"]

    for fp in args.input_files:
        fp = osp.expanduser(fp)
        if osp.isfile(fp):
            data_set.add_data(DataFileElement(fp))
        else:
            log.debug("Expanding glob: %s" % fp)
            for g in glob.iglob(fp):
                data_set.add_data(DataFileElement(g))

    # Generate a mode if the generator defines a known generation method.
    if hasattr(descriptor_generator, "generate_model"):
        descriptor_generator.generate_model(data_set)
    # Add other if-else cases for other known implementation-specific generation
    # methods stubs

    # Generate descriptors of data for building NN index.
    data2descriptor = descriptor_generator.compute_descriptor_async(
        data_set, descriptor_elem_factory)

    try:
        nn_index.build_index(six.itervalues(data2descriptor))
    except RuntimeError:
        # Already built model, so skipping this step
        pass

    rel_index.build_index(six.itervalues(data2descriptor))
Пример #37
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    output_filepath = args.output_filepath
    overwrite = args.overwrite
    verbose = args.verbose

    llevel = logging.DEBUG if verbose else logging.INFO
    bin_utils.initialize_logging(logging.getLogger(), llevel)
    log = logging.getLogger("main")

    # Merge loaded config with default
    config_loaded = False
    config = default_config()
    if args.config:
        if os.path.isfile(args.config):
            with open(args.config, 'r') as f:
                config.update(json.load(f))
            config_loaded = True
        elif not os.path.isfile(args.config):
            log.error("Configuration file path not valid.")
            exit(1)

    bin_utils.output_config(args.output_config, config, log, True)

    # Configuration must have been loaded at this point since we can't normally
    # trust the default.
    if not config_loaded:
        log.error("No configuration provided")
        exit(1)

    if not args.input_file:
        log.error("Failed to provide an input file path")
        exit(1)
    elif not os.path.isfile(args.input_file):
        log.error("Given path does not point to a file.")
        exit(1)

    input_filepath = args.input_file
    data_element = DataFileElement(input_filepath)

    factory = DescriptorElementFactory.from_config(
        config['descriptor_factory'])
    #: :type: smqtk.algorithms.descriptor_generator.DescriptorGenerator
    cd = plugin.from_plugin_config(config['content_descriptor'],
                                   get_descriptor_generator_impls)
    descr_elem = cd.compute_descriptor(data_element, factory, overwrite)
    vec = descr_elem.vector()

    if vec is None:
        log.error("Failed to generate a descriptor vector for the input data!")

    if output_filepath:
        numpy.save(output_filepath, vec)
    else:
        # Construct string, because numpy
        s = []
        # noinspection PyTypeChecker
        for f in vec:
            s.append('%15f' % f)
        print ' '.join(s)
Пример #38
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    debug_smqtk = args.debug_smqtk
    debug_server = args.debug_server

    bin_utils.initialize_logging(logging.getLogger("smqtk"),
                                 logging.INFO - (10 * debug_smqtk))
    bin_utils.initialize_logging(logging.getLogger("werkzeug"),
                                 logging.WARN - (20 * debug_server))
    log = logging.getLogger("smqtk.main")

    web_applications = smqtk.web.get_web_applications()

    if args.list:
        log.info("")
        log.info("Available applications:")
        log.info("")
        for l in web_applications:
            log.info("\t" + l)
        log.info("")
        exit(0)

    application_name = args.application

    if application_name is None:
        log.error("No application name given!")
        exit(1)
    elif application_name not in web_applications:
        log.error("Invalid application label '%s'", application_name)
        exit(1)

    app_class = web_applications[application_name]

    # Merge loaded config with default
    config_loaded = False
    config = app_class.get_default_config()
    if args.config:
        if os.path.isfile(args.config):
            with open(args.config, 'r') as f:
                config.update(json.load(f))
            config_loaded = True
        elif not os.path.isfile(args.config):
            log.error("Configuration file path not valid.")
            exit(1)

    # Output config and exit if requested
    bin_utils.output_config(args.output_config, config, log, args.overwrite)

    # Configuration must have been loaded at this point since we can't normally
    # trust the default.
    if not config_loaded:
        log.error("No configuration provided")
        exit(1)

    host = args.host
    port = args.port and int(args.port)
    use_reloader = args.reload
    use_threading = args.threaded
    use_basic_auth = args.use_basic_auth

    # noinspection PyUnresolvedReferences
    app = app_class.from_config(config)
    if use_basic_auth:
        app.config["BASIC_AUTH_FORCE"] = True
        BasicAuth(app)
    app.config['DEBUG'] = debug_server

    app.run(host=host,
            port=port,
            debug=debug_server,
            use_reloader=use_reloader,
            threaded=use_threading)
Пример #39
0
if __name__ == "__main__":
    p = cli_parser()
    args = p.parse_args()

    debug = args.debug
    config_fp = args.config
    out_config_fp = args.gen_config
    completed_files_fp = args.completed_files
    filelist_fp = args.file_list
    batch_size = args.batch_size

    # Initialize logging
    llevel = debug and logging.DEBUG or logging.INFO
    if not logging.getLogger('smqtk').handlers:
        initialize_logging(logging.getLogger('smqtk'), llevel)
    if not logging.getLogger('__main__').handlers:
        initialize_logging(logging.getLogger('__main__'), llevel)

    l = logging.getLogger(__name__)

    # Merge loaded config with default
    config_loaded = False
    c = default_config()
    if config_fp:
        if os.path.isfile(config_fp):
            with open(config_fp) as f:
                c.update(json.loads(jsmin(f.read())))
            config_loaded = True
        else:
            l.error("Config file path not valid")
Пример #40
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    #
    # Setup logging
    #
    if not logging.getLogger().handlers:
        if args.verbose:
            bin_utils.initialize_logging(logging.getLogger(), logging.DEBUG)
        else:
            bin_utils.initialize_logging(logging.getLogger(), logging.INFO)
    log = logging.getLogger("smqtk.scripts.iqr_app_model_generation")

    search_app_config = json.loads(jsmin.jsmin(open(args.config).read()))

    #
    # Input parameters
    #
    # The following dictionaries are JSON configurations that are used to
    # configure the various data structures and algorithms needed for the IQR demo
    # application. Values here can be changed to suit your specific data and
    # algorithm needs.
    #
    # See algorithm implementation doc-strings for more information on configuration
    # parameters (see implementation class ``__init__`` method).
    #

    # base actions on a specific IQR tab configuration (choose index here)
    if args.tab <  0 or args.tab > (len(search_app_config["iqr_tabs"]) - 1):
        log.error("Invalid tab number provided.")
        exit(1)

    search_app_iqr_config = search_app_config["iqr_tabs"][args.tab]

    # Configure DataSet implementation and parameters
    data_set_config = search_app_iqr_config['data_set']

    # Configure DescriptorGenerator algorithm implementation, parameters and
    # persistant model component locations (if implementation has any).
    descriptor_generator_config = search_app_iqr_config['descr_generator']

    # Configure NearestNeighborIndex algorithm implementation, parameters and
    # persistant model component locations (if implementation has any).
    nn_index_config = search_app_iqr_config['nn_index']

    # Configure RelevancyIndex algorithm implementation, parameters and
    # persistant model component locations (if implementation has any).
    #
    # The LibSvmHikRelevancyIndex implementation doesn't actually build a persistant
    # model (or doesn't have to that is), but we're leaving this block here in
    # anticipation of other potential implementations in the future.
    #
    rel_index_config = search_app_iqr_config['rel_index_config']

    # Configure DescriptorElementFactory instance, which defines what implementation
    # of DescriptorElement to use for storing generated descriptor vectors below.
    descriptor_elem_factory_config = search_app_iqr_config['descriptor_factory']

    #
    # Initialize data/algorithms
    #
    # Constructing appropriate data structures and algorithms, needed for the IQR
    # demo application, in preparation for model training.
    #

    descriptor_elem_factory = \
        representation.DescriptorElementFactory \
        .from_config(descriptor_elem_factory_config)

    #: :type: representation.DataSet
    data_set = \
        plugin.from_plugin_config(data_set_config,
                                  representation.get_data_set_impls)
    #: :type: algorithms.DescriptorGenerator
    descriptor_generator = \
        plugin.from_plugin_config(descriptor_generator_config,
                                  algorithms.get_descriptor_generator_impls)

    #: :type: algorithms.NearestNeighborsIndex
    nn_index = \
        plugin.from_plugin_config(nn_index_config,
                                  algorithms.get_nn_index_impls)

    #: :type: algorithms.RelevancyIndex
    rel_index = \
        plugin.from_plugin_config(rel_index_config,
                                  algorithms.get_relevancy_index_impls)

    #
    # Build models
    #
    # Perform the actual building of the models.
    #

    # Add data files to DataSet
    DataFileElement = representation.get_data_element_impls()["DataFileElement"]

    for fp in args.input_files:
        fp = osp.expanduser(fp)
        if osp.isfile(fp):
            data_set.add_data(DataFileElement(fp))
        else:
            log.debug("Expanding glob: %s" % fp)
            for g in glob.iglob(fp):
                data_set.add_data(DataFileElement(g))

    # Generate a mode if the generator defines a known generation method.
    if hasattr(descriptor_generator, "generate_model"):
        descriptor_generator.generate_model(data_set)
    # Add other if-else cases for other known implementation-specific generation
    # methods stubs

    # Generate descriptors of data for building NN index.
    data2descriptor = descriptor_generator.compute_descriptor_async(
        data_set, descriptor_elem_factory
    )

    try:
        nn_index.build_index(data2descriptor.itervalues())
    except RuntimeError:
        # Already built model, so skipping this step
        pass

    rel_index.build_index(data2descriptor.itervalues())
Пример #41
0
def main():
    args = cli_parser().parse_args()

    ui_config_filepath, iqr_config_filepath = args.config
    llevel = logging.DEBUG if args.verbose else logging.INFO
    tab = args.tab
    input_files_globs = args.input_files

    # Not using `bin_utils.utility_main_helper`` due to deviating from single-
    # config-with-default usage.
    bin_utils.initialize_logging(logging.getLogger('smqtk'), llevel)
    bin_utils.initialize_logging(logging.getLogger('__main__'), llevel)
    log = logging.getLogger(__name__)

    log.info("Loading UI config: '{}'".format(ui_config_filepath))
    ui_config, ui_config_loaded = bin_utils.load_config(ui_config_filepath)
    log.info("Loading IQR config: '{}'".format(iqr_config_filepath))
    iqr_config, iqr_config_loaded = bin_utils.load_config(iqr_config_filepath)
    if not (ui_config_loaded and iqr_config_loaded):
        raise RuntimeError("One or both configuration files failed to load.")

    # Ensure the given "tab" exists in UI configuration.
    if tab is None:
        log.error("No configuration tab provided to drive model generation.")
        exit(1)
    if tab not in ui_config["iqr_tabs"]:
        log.error("Invalid tab provided: '{}'. Available tags: {}"
                  .format(tab, list(ui_config["iqr_tabs"])))
        exit(1)

    #
    # Gather Configurations
    #
    log.info("Extracting plugin configurations")

    ui_tab_config = ui_config["iqr_tabs"][tab]
    iqr_plugins_config = iqr_config['iqr_service']['plugins']

    # Configure DataSet implementation and parameters
    data_set_config = ui_tab_config['data_set']

    # Configure DescriptorElementFactory instance, which defines what
    # implementation of DescriptorElement to use for storing generated
    # descriptor vectors below.
    descriptor_elem_factory_config = iqr_plugins_config['descriptor_factory']

    # Configure DescriptorGenerator algorithm implementation, parameters and
    # persistent model component locations (if implementation has any).
    descriptor_generator_config = iqr_plugins_config['descriptor_generator']

    # Configure NearestNeighborIndex algorithm implementation, parameters and
    # persistent model component locations (if implementation has any).
    nn_index_config = iqr_plugins_config['neighbor_index']

    #
    # Initialize data/algorithms
    #
    # Constructing appropriate data structures and algorithms, needed for the
    # IQR demo application, in preparation for model training.
    #
    log.info("Instantiating plugins")
    #: :type: representation.DataSet
    data_set = \
        plugin.from_plugin_config(data_set_config,
                                  representation.get_data_set_impls())
    descriptor_elem_factory = \
        representation.DescriptorElementFactory \
        .from_config(descriptor_elem_factory_config)
    #: :type: algorithms.DescriptorGenerator
    descriptor_generator = \
        plugin.from_plugin_config(descriptor_generator_config,
                                  algorithms.get_descriptor_generator_impls())
    #: :type: algorithms.NearestNeighborsIndex
    nn_index = \
        plugin.from_plugin_config(nn_index_config,
                                  algorithms.get_nn_index_impls())

    #
    # Build models
    #
    log.info("Adding files to dataset '{}'".format(data_set))
    for g in input_files_globs:
        g = osp.expanduser(g)
        if osp.isfile(g):
            data_set.add_data(DataFileElement(g, readonly=True))
        else:
            log.debug("Expanding glob: %s" % g)
            for fp in glob.iglob(g):
                data_set.add_data(DataFileElement(fp, readonly=True))

    # Generate a model if the generator defines a known generation method.
    try:
        log.debug("descriptor generator as model to generate?")
        descriptor_generator.generate_model(data_set)
    except AttributeError as ex:
        log.debug("descriptor generator as model to generate - Nope: {}"
                  .format(str(ex)))

    # Generate descriptors of data for building NN index.
    log.info("Computing descriptors for data set with {}"
             .format(descriptor_generator))
    data2descriptor = descriptor_generator.compute_descriptor_async(
        data_set, descriptor_elem_factory
    )

    # Possible additional support steps before building NNIndex
    try:
        # Fit the LSH index functor
        log.debug("Has LSH Functor to fit?")
        nn_index.lsh_functor.fit(six.itervalues(data2descriptor))
    except AttributeError as ex:
        log.debug("Has LSH Functor to fit - Nope: {}".format(str(ex)))

    log.info("Building nearest neighbors index {}".format(nn_index))
    nn_index.build_index(six.itervalues(data2descriptor))
Пример #42
0
def main():
    parser = cli_parser()
    args = parser.parse_args()

    debug_smqtk = args.debug_smqtk
    debug_server = args.debug_server

    bin_utils.initialize_logging(logging.getLogger("smqtk"),
                                 logging.INFO - (10*debug_smqtk))
    bin_utils.initialize_logging(logging.getLogger("werkzeug"),
                                 logging.WARN - (20*debug_server))
    log = logging.getLogger("smqtk.main")

    web_applications = smqtk.web.get_web_applications()

    if args.list:
        log.info("")
        log.info("Available applications:")
        log.info("")
        for l in web_applications:
            log.info("\t" + l)
        log.info("")
        exit(0)

    application_name = args.application

    if application_name is None:
        log.error("No application name given!")
        exit(1)
    elif application_name not in web_applications:
        log.error("Invalid application label '%s'", application_name)
        exit(1)

    app_class = web_applications[application_name]

    # Merge loaded config with default
    config_loaded = False
    config = app_class.get_default_config()
    if args.config:
        if os.path.isfile(args.config):
            with open(args.config, 'r') as f:
                config.update(json.load(f))
            config_loaded = True
        elif not os.path.isfile(args.config):
            log.error("Configuration file path not valid.")
            exit(1)

    # Output config and exit if requested
    bin_utils.output_config(args.output_config, config, log, args.overwrite)

    # Configuration must have been loaded at this point since we can't normally
    # trust the default.
    if not config_loaded:
        log.error("No configuration provided")
        exit(1)

    host = args.host
    port = args.port and int(args.port)
    use_reloader = args.reload
    use_threading = args.threaded
    use_basic_auth = args.use_basic_auth

    # noinspection PyUnresolvedReferences
    app = app_class.from_config(config)
    if use_basic_auth:
        app.config["BASIC_AUTH_FORCE"] = True
        BasicAuth(app)
    app.config['DEBUG'] = debug_server

    app.run(host=host, port=port, debug=debug_server, use_reloader=use_reloader,
            threaded=use_threading)