def run_classifier_labels(hdfs_input_pos, hdfs_input_neg, hdfs_output, classifier_name, classifier_extra, local_labels, classifier, **kw): """ TODO Finish docstring Args: hdfs_output: Path to hdfs temporary output or None if execution should be performed locally using hadoopy.launch_local. """ labels = {} try: labels = file_parse.load(local_labels) except IOError: pass if hdfs_output is None: j = hadoopy.launch_local(hdfs_input_pos, None, _lf('collect_keys.py')) pos_keys = sum((x[1] for x in j['output']), []) j = hadoopy.launch_local(hdfs_input_neg, None, _lf('collect_keys.py')) neg_keys = sum((x[1] for x in j['output']), []) else: hdfs_output_pos = hdfs_output + '/pos' hdfs_output_neg = hdfs_output + '/neg' picarus._launch_frozen(hdfs_input_pos, hdfs_output_pos, _lf('collect_keys.py')) picarus._launch_frozen(hdfs_input_neg, hdfs_output_neg, _lf('collect_keys.py')) pos_keys = sum((x[1] for x in hadoopy.readtb(hdfs_output_pos)), []) neg_keys = sum((x[1] for x in hadoopy.readtb(hdfs_output_neg)), []) labels[classifier_name] = {'labels': {'1': pos_keys, '-1': neg_keys}, 'classifier': classifier, 'classifier_extra': classifier_extra} file_parse.dump(labels, local_labels)
def run_predict_classifier(hdfs_input, hdfs_classifier_input, hdfs_output, classes=None, image_hashes=None, **kw): import classipy # NOTE: Adds necessary files files = glob.glob(classipy.__path__[0] + "/lib/*") fp = tempfile.NamedTemporaryFile(suffix='.pkl.gz') file_parse.dump([x for x in hadoopy.readtb(hdfs_classifier_input) if classes is None or x[0] in classes], fp.name) files.append(fp.name) picarus._launch_frozen(hdfs_input, hdfs_output, _lf('predict_classifier.py'), files=files, reducer=None, cmdenvs=['CLASSIFIERS_FN=%s' % os.path.basename(fp.name)], image_hashes=image_hashes, dummy_arg=fp)
def run_predict_windows(hdfs_input, hdfs_classifier_input, feature, hdfs_output, image_height, image_width, **kw): import classipy # NOTE: Adds necessary files files = glob.glob(classipy.__path__[0] + "/lib/*") fp = tempfile.NamedTemporaryFile(suffix='.pkl.gz') file_parse.dump(list(hadoopy.readtb(hdfs_classifier_input)), fp.name) files.append(fp.name) files.append(_lf('data/haarcascade_frontalface_default.xml')) cmdenvs = ['CLASSIFIERS_FN=%s' % os.path.basename(fp.name)] cmdenvs += ['IMAGE_HEIGHT=%d' % image_height, 'IMAGE_WIDTH=%d' % image_width, 'FEATURE=%s' % feature] picarus._launch_frozen(hdfs_input, hdfs_output, _lf('predict_windows.py'), cmdenvs=cmdenvs, files=files, dummy_arg=fp)
def run_classifier_labels(hdfs_input_pos, hdfs_input_neg, hdfs_output, classifier_name, classifier_extra, local_labels, classifier, **kw): """ TODO Finish docstring Args: hdfs_output: Path to hdfs temporary output or None if execution should be performed locally using hadoopy.launch_local. """ labels = {} try: labels = file_parse.load(local_labels) except IOError: pass if hdfs_output is None: j = hadoopy.launch_local(hdfs_input_pos, None, _lf('collect_keys.py')) pos_keys = sum((x[1] for x in j['output']), []) j = hadoopy.launch_local(hdfs_input_neg, None, _lf('collect_keys.py')) neg_keys = sum((x[1] for x in j['output']), []) else: hdfs_output_pos = hdfs_output + '/pos' hdfs_output_neg = hdfs_output + '/neg' picarus._launch_frozen(hdfs_input_pos, hdfs_output_pos, _lf('collect_keys.py')) picarus._launch_frozen(hdfs_input_neg, hdfs_output_neg, _lf('collect_keys.py')) pos_keys = sum((x[1] for x in hadoopy.readtb(hdfs_output_pos)), []) neg_keys = sum((x[1] for x in hadoopy.readtb(hdfs_output_neg)), []) labels[classifier_name] = { 'labels': { '1': pos_keys, '-1': neg_keys }, 'classifier': classifier, 'classifier_extra': classifier_extra } file_parse.dump(labels, local_labels)
def run_predict_classifier(hdfs_input, hdfs_classifier_input, hdfs_output, classes=None, image_hashes=None, **kw): import classipy # NOTE: Adds necessary files files = glob.glob(classipy.__path__[0] + "/lib/*") fp = tempfile.NamedTemporaryFile(suffix='.pkl.gz') file_parse.dump([ x for x in hadoopy.readtb(hdfs_classifier_input) if classes is None or x[0] in classes ], fp.name) files.append(fp.name) picarus._launch_frozen( hdfs_input, hdfs_output, _lf('predict_classifier.py'), files=files, reducer=None, cmdenvs=['CLASSIFIERS_FN=%s' % os.path.basename(fp.name)], image_hashes=image_hashes, dummy_arg=fp)
def _launch_frozen(in_path, out_path, script_path, jobconfs_default=(), *args, **kw): import hadoopy import os kw = dict(kw) # Make a copy as we will be mutating it kw['frozen_tar_path'] = _freeze_script(script_path)['frozen_tar_path'] if 'reducer' not in kw and 'num_reducers' not in kw: kw['num_reducers'] = 1 if 'jobconfs' in kw: kw['jobconfs'] = kw['jobconfs'] + GLOBAL_JOBCONFS else: kw['jobconfs'] = GLOBAL_JOBCONFS if 'jobconfs' not in kw: kw['jobconfs'] = [] if jobconfs_default: jobconfs_dict = dict(x.split('=', 1) for x in kw['jobconfs']) jobconfs_default_dict = dict(x.split('=', 1) for x in jobconfs_default) for jobconf_name, jobconf_value in jobconfs_default_dict.items(): if jobconf_name not in jobconfs_dict: jobconfs_dict[jobconf_name] = jobconf_value kw['jobconfs'] = ['%s=%s' % x for x in jobconfs_dict.items()] if 'image_hashes' in kw and kw['image_hashes'] is not None: import tempfile fp = tempfile.NamedTemporaryFile(suffix='.pkl.gz') file_parse.dump(kw['image_hashes'], fp.name) try: kw['files'].append(fp.name) except KeyError: kw['files'] = [fp.name] try: kw['cmdenvs'].append('PICARUS_VALID_IMAGE_HASHES=%s' % os.path.basename(fp.name)) except KeyError: kw['cmdenvs'] = ['PICARUS_VALID_IMAGE_HASHES=%s' % os.path.basename(fp.name)] kw['_internal_dummy_arg'] = fp # Keep the object alive del kw['image_hashes'] return hadoopy.launch_frozen(in_path, out_path, script_path, *args, **kw)
def save_classifiers(hdfs_classifier_path, classes, name): if classes: classes = set(classes.split()) file_parse.dump([x for x in hadoopy.readtb(hdfs_classifier_path) if classes is None or x[0] in classes], name)
def _report_clusters(hdfs_input, local_json_output, category, make_faces, **kw): report = report_clusters(hdfs_input, category, make_faces, **kw) file_parse.dump(report, local_json_output)
def _report_video_keyframe(hdfs_input, local_json_output, **kw): report = report_video_keyframe(hdfs_input) file_parse.dump(report, local_json_output)