def posterior_stash_to_jams(stash, penalty_values, output_directory, vocab, model_params): """Decode a stash of posteriors to JAMS and write to disk. Parameters ---------- stash : biggie.Stash Posteriors to decode. penalty_values : array_like Collection of penalty values with which to run Viterbi. output_directory : str Base path to write out JAMS files; each collection will be written as {output_directory}/{penalty_values[i]}.jamset vocab : dl4mir.chords.lexicon.Vocab Map from posterior indices to string labels. model_params : dict Metadata to associate with the annotation. """ # Sweep over the default penalty values. for penalty in penalty_values: print "[{0}] \tStarting p = {1}".format(time.asctime(), penalty) results = decode_stash_parallel(stash, penalty, vocab, NUM_CPUS) output_file = os.path.join( output_directory, "{0}.jamset".format(penalty)) jamset = dict() for key, annot in results.iteritems(): annot.sandbox.update(timestamp=time.asctime(), **model_params) jam = pyjams.JAMS(chord=[annot]) jam.sandbox.track_id = key jamset[key] = jam futils.create_directory(output_directory) util.save_jamset(jamset, output_file)
def posterior_stash_to_jams(stash, penalty_values, output_directory, vocab, model_params): """Decode a stash of posteriors to JAMS and write to disk. Parameters ---------- stash : biggie.Stash Posteriors to decode. penalty_values : array_like Collection of penalty values with which to run Viterbi. output_directory : str Base path to write out JAMS files; each collection will be written as {output_directory}/{penalty_values[i]}.jamset vocab : dl4mir.chords.lexicon.Vocab Map from posterior indices to string labels. model_params : dict Metadata to associate with the annotation. """ # Sweep over the default penalty values. for penalty in penalty_values: print "[{0}] \tStarting p = {1}".format(time.asctime(), penalty) results = decode_stash_parallel(stash, penalty, vocab, NUM_CPUS) output_file = os.path.join(output_directory, "{0}.jamset".format(penalty)) jamset = dict() for key, annot in results.iteritems(): annot.sandbox.update(timestamp=time.asctime(), **model_params) jam = pyjams.JAMS(chord=[annot]) jam.sandbox.track_id = key jamset[key] = jam futils.create_directory(output_directory) util.save_jamset(jamset, output_file)
def main(stash_file, input_key, transform_file, param_file, output_file, verbose=True): transform = optimus.load(transform_file, param_file) stash = biggie.Stash(stash_file) futil.create_directory(os.path.split(output_file)[0]) output = biggie.Stash(output_file) util.process_stash(stash, transform, output, input_key, verbose=verbose)
def main(args): """Main routine for importing data.""" futils.create_directory(path.split(args.output_file)[0]) if args.verbose: print "[%s] Creating: %s" % (time.asctime(), args.output_file) stash = biggie.Stash(args.output_file) populate_stash(futils.load_textlist(args.key_list), args.cqt_directory, args.lab_directory, stash, np.float32)
def main(args): """Main routine for importing data.""" futils.create_directory(path.split(args.output_file)[0]) if args.verbose: print "[%s] Creating: %s" % (time.asctime(), args.output_file) stash = biggie.Stash(args.output_file) populate_stash( futils.load_textlist(args.key_list), args.cqt_directory, args.lab_directory, stash, np.float32)
def main(args): stash = biggie.Stash(args.input_file) futil.create_directory(path.split(args.output_file)[0]) stats = dict() vocab = lex.Strict(157) stats['prior'] = D.class_prior_v157(stash, vocab).tolist() with open(args.output_file, 'w') as fp: json.dump(stats, fp)
def main(args): ref_jamset = jams_util.load_jamset(args.ref_jamset) jamset_files = futil.load_textlist(args.jamset_textlist) pool = Parallel(n_jobs=args.num_cpus) fx = delayed(score_one) results = pool(fx(ref_jamset, f, args.min_support) for f in jamset_files) results = {f: r for f, r in zip(jamset_files, results)} output_dir = os.path.split(args.output_file)[0] futil.create_directory(output_dir) with open(args.output_file, 'w') as fp: json.dump(results, fp, indent=2)
def main(args): """Main routine for importing data.""" data_splits = json.load(open(args.split_file)) output_file_fmt = path.join(args.output_directory, FILE_FMT) for fold in data_splits: for split in data_splits[fold]: output_file = output_file_fmt % (fold, split) futils.create_directory(path.split(output_file)[0]) if args.verbose: print "[%s] Creating: %s" % (time.asctime(), output_file) stash = biggie.Stash(output_file) populate_stash(data_splits[fold][split], args.cqt_directory, args.jams_directory, stash, np.float32)
def main(args): fpath = os.path.join(args.data_directory, "{0}.hdf5") train = biggie.Stash(fpath.format('train'), cache=True) valid = biggie.Stash(fpath.format('valid'), cache=True) test = biggie.Stash(fpath.format('test'), cache=True) results = classify(train, valid, test, num_train=50000, num_valid=10000, num_test=25000) for k in 'train', 'valid', 'test': print("{0}: {1:.4}".format(k, results['{0}_score'.format(k)])) output_dir = os.path.split(args.stats_file)[0] futil.create_directory(output_dir) with open(args.stats_file, 'w') as fp: json.dump(results, fp, indent=2)
def main(textlist, dim0, dim1, output_directory, param_file, num_cpus=-1): """Apply Local Contrast Normalization to a collection of files. Parameters ---------- textlist : str A text list of npz filepaths. dim0 : int First dimension of the filter kernel (time). dim1 : int Second dimension of the filter kernel (frequency). output_directory : str Directory to save output arrays. param_file : str Directory to save the parameters used. num_cpus : int, default=-1 Number of CPUs over which to parallelize computations. """ # Set the kernel globally. PARAMS[KERNEL] = create_kernel(dim0, dim1) output_dir = futil.create_directory(output_directory) with open(os.path.join(output_dir, param_file), "w") as fp: json.dump({"dim0": dim0, "dim1": dim1}, fp, indent=2) pool = Parallel(n_jobs=num_cpus) dlcn = delayed(apply_lcn) iterargs = futil.map_path_file_to_dir(textlist, output_dir, EXT) return pool(dlcn(x) for x in iterargs)
def main(args): sim_margin = -RADIUS * args.margin trainer, predictor, zerofilter = models.iX_c3f2_oY(20, 3, 'xlarge') time_dim = trainer.inputs['cqt'].shape[2] if args.init_param_file: print("Loading parameters: {0}".format(args.init_param_file)) trainer.load_param_values(args.init_param_file) print("Opening {0}".format(args.training_file)) stash = biggie.Stash(args.training_file, cache=True) stream = S.minibatch( D.create_pairwise_stream(stash, time_dim, working_size=100, threshold=0.05), batch_size=BATCH_SIZE) stream = D.batch_filter( stream, zerofilter, threshold=2.0**-16, min_batch=1, max_consecutive_skips=100, sim_margin=sim_margin, diff_margin=RADIUS) print("Starting '{0}'".format(args.trial_name)) driver = optimus.Driver( graph=trainer, name=args.trial_name, output_directory=futil.create_directory(args.output_directory)) hyperparams = dict( learning_rate=LEARNING_RATE, sim_margin=sim_margin, diff_margin=RADIUS) predictor_file = path.join(driver.output_directory, args.predictor_file) optimus.save(predictor, def_file=predictor_file) driver.fit(stream, hyperparams=hyperparams, **DRIVER_ARGS)
def main(args): predictor = pca_lda_graph(20, args.n_components, 3) input_shape = list(predictor.inputs['cqt'].shape) time_dim = input_shape[2] input_shape[0] = args.num_points print("Opening {0}".format(args.training_file)) stash = biggie.Stash(args.training_file, cache=True) stream = D.create_labeled_stream(stash, time_dim, working_size=1000, threshold=0.05) print("Starting '{0}'".format(args.trial_name)) data, labels = np.zeros(input_shape), [] for idx, x in enumerate(stream): data[idx, ...] = x.cqt labels.append(x.label) if len(labels) == args.num_points: break elif (len(labels) % PRINT_FREQ) == 0: print("[{0}] {1:5} / {2:5}" "".format(time.asctime(), len(labels), args.num_points)) predictor.param_values = fit_params(data, labels, args.n_components, 3) output_directory = futil.create_directory(args.output_directory) predictor_file = path.join(output_directory, args.predictor_file) param_file = predictor_file.replace(".json", ".npz") optimus.save(predictor, def_file=predictor_file, param_file=param_file)
def main(args): filepaths = glob.glob( path.join(args.audio_directory, "*.%s" % args.ext.strip("."))) if path.exists(args.output_file): result = json.load(open(args.output_file)) print "File exists: Found %d results" % len(result) else: futil.create_directory(path.split(args.output_file)[0]) result = dict() result = fetch_data( filepaths, result=result, overwrite=False, checkpoint_file=args.output_file) with open(args.output_file, 'w') as fp: json.dump(result, fp, indent=2)
def main(args): filepaths = glob.glob( path.join(args.audio_directory, "*.%s" % args.ext.strip("."))) if path.exists(args.output_file): result = json.load(open(args.output_file)) print "File exists: Found %d results" % len(result) else: futil.create_directory(path.split(args.output_file)[0]) result = dict() result = fetch_data(filepaths, result=result, overwrite=False, checkpoint_file=args.output_file) with open(args.output_file, 'w') as fp: json.dump(result, fp, indent=2)
def main(args): """Main routine for importing data.""" partitions = json.load(open(args.split_file)) output_file_fmt = path.join(args.output_directory, FILE_FMT) for set_name, subset in partitions.items(): for fold_idx, splits in subset.items(): for split, keys in splits.items(): output_file = output_file_fmt.format( subset=set_name, fold_idx=fold_idx, split=split) futil.create_directory(path.split(output_file)[0]) if args.verbose: print("[{0}] Creating: {1}" "".format(time.asctime(), output_file)) stash = biggie.Stash(output_file) populate_stash(keys, args.cqt_directory, stash, np.float32) stash.close()
def main(textlist, output_directory, cqt_params=None, num_cpus=-1): if cqt_params: DEFAULT_PARAMS.update(json.load(open(cqt_params))) output_dir = futil.create_directory(output_directory) pool = Parallel(n_jobs=num_cpus) dcqt = delayed(audio_file_to_cqt) iterargs = futil.map_path_file_to_dir(textlist, output_dir, EXT) return pool(dcqt(x) for x in iterargs)
def main(args): """Main routine for importing data.""" partitions = json.load(open(args.split_file)) output_file_fmt = path.join(args.output_directory, FILE_FMT) for set_name, subset in partitions.items(): for fold_idx, splits in subset.items(): for split, keys in splits.items(): output_file = output_file_fmt.format(subset=set_name, fold_idx=fold_idx, split=split) futil.create_directory(path.split(output_file)[0]) if args.verbose: print("[{0}] Creating: {1}" "".format(time.asctime(), output_file)) stash = biggie.Stash(output_file) populate_stash(keys, args.cqt_directory, stash, np.float32) stash.close()
def main(args): param_files = futils.load_textlist(args.param_textlist) param_files.sort() param_files = param_files[args.start_index::args.stride] transform = optimus.load(args.transform_file) stash = biggie.Stash(args.validation_file, cache=True) output_dir = futils.create_directory(args.output_dir) for fidx, param_file in enumerate(param_files): transform.load_param_values(param_file) output_file = params_to_output_file(param_file, output_dir) futils.create_directory(os.path.split(output_file)[0]) if os.path.exists(output_file): os.remove(output_file) output = biggie.Stash(output_file) util.process_stash(stash, transform, output, args.field, verbose=args.verbose)
def main(args): metadata = dict() if args.annotation_metadata: metadata.update(json.load(open(args.annotation_metadata))) jamset = dict() for key, lab_files in json.load(open(args.annotation_set)).items(): jamset[key] = pyjams.JAMS() for f in [lab_files]: intervals, labels = mir_eval.io.load_labeled_intervals(str(f)) annot = jamset[key].chord.create_annotation() pyjams.util.fill_range_annotation_data( intervals[:, 0], intervals[:, 1], labels, annot) annot.annotation_metadata.update(**metadata.get(key, {})) annot.sandbox.source_file = f annot.sandbox.timestamp = time.asctime() futil.create_directory(os.path.split(args.output_file)[0]) util.save_jamset(jamset, args.output_file)
def main(args): ref_jamset = jams_util.load_jamset(args.ref_jamset) est_jamset = jams_util.load_jamset(args.est_jamset) keys = est_jamset.keys() keys.sort() ref_annots = [ref_jamset[k].chord[0] for k in keys] est_annots = [est_jamset[k].chord[0] for k in keys] scores, supports = EVAL.score_annotations(ref_annots, est_annots, METRICS) results = dict(metrics=METRICS, score_annotations=(scores.tolist(), supports.tolist())) scores_macro = scores.mean(axis=0) scalar = supports.sum(axis=0) scalar[scalar == 0] = 1.0 scores_micro = (supports * scores).sum(axis=0) / scalar print tabulate.tabulate( [['macro'] + scores_macro.tolist(), ['micro'] + scores_micro.tolist()], headers=[''] + METRICS) label_counts = EVAL.reduce_annotations(ref_annots, est_annots, METRICS) mac_aves = [] for m in METRICS: (labels, scores, support) = EVAL.macro_average(label_counts[m], True, 0.0) mac_aves.append([labels, scores.tolist(), support.tolist()]) results.update(macro_average=mac_aves) output_dir = os.path.split(args.output_file)[0] futil.create_directory(output_dir) with open(args.output_file, 'w') as fp: json.dump(results, fp, indent=2)
def main(args): arch_key = args.arch_size if args.dropout: arch_key += '_dropout' trainer, predictor = models.MODELS[arch_key]() time_dim = trainer.inputs['data'].shape[2] if args.init_param_file: print "Loading parameters: %s" % args.init_param_file trainer.load_param_values(args.init_param_file) print "Opening %s" % args.training_file stash = biggie.Stash(args.training_file, cache=True) stream = D.create_chord_index_stream(stash, time_dim, max_pitch_shift=0, lexicon=VOCAB) # Load prior stat_file = "%s.json" % path.splitext(args.training_file)[0] prior = np.array(json.load(open(stat_file))['prior'], dtype=float) trainer.nodes['prior'].weight.value = 1.0 / prior.reshape(1, -1) stream = S.minibatch(stream, batch_size=BATCH_SIZE) print "Starting '%s'" % args.trial_name driver = optimus.Driver(graph=trainer, name=args.trial_name, output_directory=futil.create_directory( args.output_directory)) hyperparams = dict(learning_rate=LEARNING_RATE) if args.dropout: hyperparams.update(dropout=args.dropout) predictor_file = path.join(driver.output_directory, args.predictor_file) optimus.save(predictor, def_file=predictor_file) driver.fit(stream, hyperparams=hyperparams, **DRIVER_ARGS)