def main(argv) : args = Configuration.parse_args(argv) config = list(Configuration.ArgsIter(args)) if (len(config) > 1) : print "Unit testing currently supports only one configuration possibility" config = Configuration.Configuration.fromJSONDict(config[0]) # Please don't put colons in your data_file names if you want this to work # We interpert "filea:fileb" as ["filea", "fileb"] segments_module = importlib.import_module( 'persistence.' + config['data_type']) segments_class = getattr(segments_module, config['data_type']) segments = segments_class(config) # generate all the segments to save to disk if not('outfile' in args[0]) or args[0]['outfile'] == None : outfile = segments_class.get_segment_filename(segments.config) else : outfile = args[0]['outfile'] print "Writing %s" % outfile # Unless told otherwise, don't rewrite the output, because disk IO and gzip are slow if (isinstance(segments.config.reevaluate, dict) and 'segments' in segments.config.reevaluate.keys() and segments.config.reevaluate['segments'] == True ) or \ (not os.path.isfile(outfile)) : save_data(outfile, segments.toJSONDict())
def main(argv) : parser = argparse.ArgumentParser(description="General purpose cross validation tool") parser.add_argument("--kernel-module", "-K") parser.add_argument("--kernel-arg", "-k") parser.add_argument("--distances-module", "-D") parser.add_argument("--distances-arg", "-d") parser.add_argument("--learning-module", "-L") parser.add_argument("--learning-arg", "-l") parser.add_argument("--infile", "-i") parser.add_argument("--outfile", "-o") parser.add_argument("--train-test-partitions", "-t") parser.add_argument("--pool", "-p", type=int, default=max(1,multiprocessing.cpu_count()-2)) parser.add_argument("--timeout", type=int, default=0) args = parser.parse_args(argv[1:]) input_json = load_data(args.infile, "input", None, None, argv[0] + ":") partitions_json = load_data(args.train_test_partitions, "input", None, None, argv[0] + ":") partitions = TrainTestPartitions.fromJSONDict(partitions_json) if args.pool > 1 : pool = multiprocessing.Pool(args.pool) else : pool = None if args.kernel_arg != None : kernel_arg = parse_range(args.kernel_arg, t=float) else : kernel_arg = None if args.distances_arg != None : distances_arg = parse_range(args.distances_arg, t=float) else : distances_arg = None if args.learning_arg != None : learning_arg = parse_range(args.learning_arg, t=float) else : learning_arg = None print "Kernel %s distance %s learning %s" % (kernel_arg, distances_arg, learning_arg) cv = CrossValidation(input_json, config=Configuration.fromJSONDict(input_json['config']), kernel_module=args.kernel_module, kernel_arg=kernel_arg, distances_module=args.distances_module, distances_arg=distances_arg, learning_module=args.learning_module, learning_arg=learning_arg, partitions=partitions, pool=pool, timeout=args.timeout) cv.cross_validate() if args.outfile == None : args.outfile = CrossValidation.get_cross_validation_filename(cv.config) print "Writing %s" % args.outfile save_data(args.outfile, cv.toJSONDict())
#You should have received a copy of the GNU General Public License #along with this program. If not, see <http://www.gnu.org/licenses/>. import sys import argparse import importlib from persistence.Datatypes.JSONObject import load_data, save_data from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams, PersistenceDiagram from persistence.Datatypes.Configuration import Configuration from persistence.Datatypes.Segments import SegmentInfo if __name__ == "__main__" : parser = argparse.ArgumentParser(description="Utility to add SegmentInfo data to a PersistenceDiagrams file") parser.add_argument("--infile") parser.add_argument("--outfile") args = parser.parse_args(sys.argv[1:]) in_json = load_data(args.infile, "persistence diagrams", None, None, sys.argv[0] + " : ") pd = PersistenceDiagrams.fromJSONDict(in_json) module = importlib.import_module('persistence.' + pd.config.data_type) module_class = getattr(module, pd.config.data_type) segment_filename = module_class.get_segment_filename(pd.config) seg_json = load_data(segment_filename, "segments", None, None, sys.argv[0] + " : ") for (diagram, segment) in zip(pd.diagrams, seg_json['segments']) : diagram.segment_info = SegmentInfo.fromJSONDict(segment) print "Writing %s" % (args.outfile,) save_data(args.outfile, pd.toJSONDict())
if status != None : in_obj.config.status = status if "Segments" in file_class or \ "Post" in file_class : out_file = module_class.get_segment_filename(in_obj.config, gz=False) elif "Features" in file_class : out_file = module_class.get_features_filename(in_obj.config, gz=False) elif "PersistenceDiagrams" in file_class : out_file = module_class.get_persistence_diagrams_filename(in_obj.config, gz=False) elif "Partition" in file_class : out_file = module_class.get_partition_filename(in_obj.config, gz=False) elif "Learning" in file_class : out_file = module_class.get_learning_filename(in_obj.config, gz=False) elif "Distances" in file_class or \ "ScaleSpaceSimilarity" in file_class : out_file = module_class.get_distances_filename(in_obj.config, gz=False) elif "AverageKernel" in file_class : out_file = get_filename(in_obj.config, ['max_simplices', 'persistence_epsilon', 'segment_filename', 'segment_stride', 'segment_size', 'window_size', 'window_stride', 'kernel_scale', 'kernel_gamma', 'invariant_epsilon', 'data_file', 'data_index', 'label_index', 'persistence_degree', 'data_type', 'post_process', 'post_process_arg'], "AverageKernel") elif "Kernel" in file_class : out_file = module_class.get_kernel_filename(in_obj.config, gz=False) elif "CrossValidation" in file_class : out_file = module_class.get_cross_validation_filename(in_obj.config, gz=False) print "Writing %s" % (out_file,) save_data(out_file, in_obj.toJSONDict())
"Utility to run common variations of learning tasks on a configuration file, generating segments, distances, kernels, and learning results as appropriate" ) parser.add_argument("--config") parser.add_argument("--pool", default=1, type=int) args = parser.parse_args(sys.argv[1:]) config = Configuration.fromJSONDict( parse_args([sys.argv[0], "--config", args.config])[0]) print config module = importlib.import_module('persistence.' + config.data_type) module_class = getattr(module, config.data_type) segment_filename = module_class.get_segment_filename(config) segments = module_class(config) print "Writing %s" % segment_filename save_data(segment_filename, segments.toJSONDict()) #TrainTestSplit partition_command = [ "python", "-u", "-O", "-m", "persistence.PartitionData", "--segments", segment_filename, "--learning-split", str(config.learning_split), "--learning-iterations", str(config.learning_iterations), "--cv-iterations", "5" ] subprocess.call(partition_command) partition_filename = TrainTestPartitions.get_partition_filename(config) #PersistenceDiagrams persistence_command = [ "python", "-u", "-O", "-m", "persistence.PersistenceGenerator", "--pool",
sample_data = [d for d in output if d['segment_start'] == s] segment_sizes = list(set([x['segment_size'] for x in sample_data])) segment_sizes.sort() for size in segment_sizes: segment_data = [ d for d in sample_data if d['segment_size'] == size ] w_distances = WassersteinDistances( None, PersistenceDiagrams(None, [ PersistenceDiagram.fromJSONDict(d['diagram']) for d in segment_data ]), pool=None) distances = BottleneckDistances( None, PersistenceDiagrams(None, [ PersistenceDiagram.fromJSONDict(d['diagram']) for d in segment_data ]), pool=None) out_data.append((segment_data, distances, w_distances)) goal = len(out_data) computed = pool.imap(map_func, out_data) done = [] for (c, i) in itertools.izip(computed, range(goal)): print "%d of %d" % (i, goal) done.append(c) save_data(args.outfile, done)
from persistence.Datatypes.JSONObject import load_data, save_data from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams, PersistenceDiagram def avg(l): return sum(l, 0.0) / len(l) def average_density(diagram): points = [(p[0], p[1]) for p in diagram.points if p[2] == 1] if len(points) > 2: diagram_distances = [] for (x0, y0) in points: distances = map( lambda (x1, y1): math.sqrt((x0 - x1) * (x0 - x1) + (x0 - x1) * (x0 - x1)), points) diagram_distances.append(avg(distances[1:6])) return avg(diagram_distances) else: return 0.0 if __name__ == "__main__": pool = multiprocessing.Pool(multiprocessing.cpu_count() - 2) for f in sys.argv[1:]: pds = PersistenceDiagrams.fromJSONDict( load_data(f, None, None, None, sys.argv[0] + " : ")) densities = pool.map(average_density, pds.diagrams) save_data(f + "-density", list(densities))
output.sort(key=lambda x: (x['segment_start'],x['max_simplices']/x['segment_size']/x['segment_size'], x['segment_size'])) print "Distance Computation" samples = list(set([d['segment_start'] for d in output])) samples.sort() out_data = [] for s in samples : sample_data = [d for d in output if d['segment_start'] == s] segment_sizes = list(set([x['segment_size'] for x in sample_data])) segment_sizes.sort() for size in segment_sizes : segment_data = [d for d in sample_data if d['segment_size'] == size] w_distances = WassersteinDistances(None, PersistenceDiagrams(None, [PersistenceDiagram.fromJSONDict(d['diagram']) for d in segment_data]), pool=None) distances = BottleneckDistances(None, PersistenceDiagrams(None, [PersistenceDiagram.fromJSONDict(d['diagram']) for d in segment_data]), pool=None) out_data.append((segment_data, distances, w_distances)) goal = len(out_data) computed = pool.imap(map_func, out_data) done = [] for (c,i) in itertools.izip(computed, range(goal)) : print "%d of %d" % (i, goal) done.append(c) save_data(args.outfile, done)
if __name__ == "__main__" : parser = argparse.ArgumentParser(description="Utility to run common variations of learning tasks on a configuration file, generating segments, distances, kernels, and learning results as appropriate") parser.add_argument("--config") parser.add_argument("--pool", default=1, type=int) args = parser.parse_args(sys.argv[1:]) config = Configuration.fromJSONDict(parse_args([sys.argv[0], "--config", args.config])[0]) print config module = importlib.import_module('persistence.' + config.data_type) module_class = getattr(module, config.data_type) segment_filename = module_class.get_segment_filename(config) segments = module_class(config) print "Writing %s" % segment_filename save_data(segment_filename, segments.toJSONDict()) #TrainTestSplit partition_command = ["python", "-u", "-O", "-m", "persistence.PartitionData", "--segments", segment_filename, "--learning-split", str(config.learning_split), "--learning-iterations", str(config.learning_iterations), "--cv-iterations", "5"] subprocess.call(partition_command) partition_filename = TrainTestPartitions.get_partition_filename(config) #PersistenceDiagrams persistence_command = ["python", "-u", "-O", "-m", "persistence.PersistenceGenerator", "--pool", str(args.pool), "--infile", segment_filename] subprocess.call(persistence_command)
"Creates windowed segments of a single dimension for a multidimensioned dataset" ) parser.add_argument("-i", "--infile") parser.add_argument("-d", "--data-index", default=0, type=int) parser.add_argument("-w", "--window-size", type=int) parser.add_argument("-W", "--window-stride", default=1, type=int) args = parser.parse_args(sys.argv[1:]) segments = Segments.fromJSONDict( load_data(args.infile, "segments", None, None, sys.argv[0] + ": ")) orig_window_size = segments.config.window_size segments.config.window_size = args.window_size segments.config.window_stride = args.window_stride dimensions = len(segments.segments[0].data_index) segments.config.data_index = segments.segments[0].data_index[args.data_index] for segment in segments.segments: windows = [[ segment.windows[0][(i + j) * dimensions + args.data_index] for j in range(args.window_size) ] for i in range(0, orig_window_size, args.window_stride) if ((i + args.window_size - 1) * dimensions + args.data_index) < len(segment.windows[0])] segment.data_index = segment.data_index[args.data_index] segment.window_size = args.window_stride segment.windows = windows segment_module = importlib.import_module("persistence." + segments.config.data_type) segment_class = getattr(segment_module, segments.config.data_type) segment_filename = segment_class.get_segment_filename(segments.config) print "Writing " + segment_filename save_data(segment_filename, segments.toJSONDict())
class segment_processing_callable: def __init__(self, outfile, max_simplices, epsilon, num_segments) : self.outfile = outfile self.max_simplices = max_simplices self.epsilon = epsilon self.num_segments = num_segments def __call__(self, (segment, index)) : print "Computing full rips filtration" start = time.clock() filtration = rips_filtration_generator(segment.windows, 2) persistence = PersistentHomology() full_persistence_diagram = persistence.compute_persistence_full(filtration, 2) full_runtime = time.clock() - start diagram_points = [] for p in range(full_persistence_diagram.num_pairs()) : pair = full_persistence_diagram.get_pair(p) diagram_points.append([pair.birth_time(), pair.death_time(), pair.dim()]) full_pd = PD(segment_start=segment.segment_start, labels=segment.labels, learning=segment.learning, filename=segment.filename, points=diagram_points) sparse_pds = [] if self.max_simplices != None : for m in self.max_simplices : print "Computing sparse rips filtration max_simplices %s" % (int(round(m)),) start = time.clock() filtration = sparse_rips_filtration_generator(segment.windows, int(round(m)), None, 2) persistence = PersistentHomology() sparse_persistence_diagram = persistence.compute_persistence_sparse(filtration, 2) sparsity = [filtration.get_simplex_sparsity(i) for i in [0,1,2]] sparse_runtime = time.clock() - start diagram_points = [] for p in range(sparse_persistence_diagram.num_pairs()) : pair = sparse_persistence_diagram.get_pair(p) diagram_points.append([pair.birth_time(), pair.death_time(), pair.dim()]) sparse_pd = PD(segment_start=segment.segment_start, labels=segment.labels, learning=segment.learning, filename=segment.filename, points=diagram_points) bottleneck = bottleneck_distance(full_pd.points, sparse_pd.points, 1) w1 = wasserstein_distance(full_pd.points, sparse_pd.points, 1, 1) w2 = wasserstein_distance(full_pd.points, sparse_pd.points, 1, 2) print "Distances: Bottleneck %g Wasserstein L1 %g L2 %g" % (bottleneck, w1, w2) sparse_pds.append(dict([("diagram", sparse_pd.toJSONDict()), ("max_simplices", int(round(m))), ("sparsity", sparsity), ("bottleneck_distance", bottleneck), ("wasserstein_l1", w1), ("wasserstein_l2", w2), ("runtime", sparse_runtime)])) else : for e in self.epsilon : print "Computing sparse rips filtration epsilon %s" % (e,) start = time.clock() filtration = sparse_rips_filtration_generator(segment.windows, None, e, 2) persistence = PersistentHomology() sparse_persistence_diagram = persistence.compute_persistence_sparse(filtration, 2) sparsity = [filtration.get_simplex_sparsity(i) for i in [0,1,2]] sparse_runtime = time.clock() - start diagram_points = [] for p in range(sparse_persistence_diagram.num_pairs()) : pair = sparse_persistence_diagram.get_pair(p) diagram_points.append([pair.birth_time(), pair.death_time(), pair.dim()]) sparse_pd = PD(segment_start=segment.segment_start, labels=segment.labels, learning=segment.learning, filename=segment.filename, points=diagram_points) bottleneck = bottleneck_distance(full_pd.points, sparse_pd.points, 1) w1 = wasserstein_distance(full_pd.points, sparse_pd.points, 1, 1) w2 = wasserstein_distance(full_pd.points, sparse_pd.points, 1, 2) print "Distances: Bottleneck %g Wasserstein L1 %g L2 %g" % (bottleneck, w1, w2) sparse_pds.append(dict([("diagram", sparse_pd.toJSONDict()), ("epsilon", e), ("sparsity", sparsity), ("bottleneck_distance", bottleneck), ("wasserstein_l1", w1), ("wasserstein_l2", w2), ("runtime", sparse_runtime)])) print "Saving data for segment %04d of %d to %s " % (index, self.num_segments, "%s.%04d" % (self.outfile, index)) save_data("%s.%04d" % (self.outfile, index), [dict([("full_diagram",full_pd.toJSONDict()), ("runtime", full_runtime)])] + sparse_pds) return full_pd.toJSONDict()
def main(argv): parser = argparse.ArgumentParser(description='Tool to generate a similarity kernel from persistence data') parser.add_argument('-i', '--infile', help='Input JSON Similarity Kernel file') parser.add_argument('-o', '--outfile', help='Output JSON Learning file') parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') parser.add_argument('-c', '--learning-C', help='C value for SVM. Specify a range for 1-dimensional cross-validation') parser.add_argument('-t', '--train-test-partitions', help='Precomputed train / test partitions') args = vars(parser.parse_args(argv[1:])) kf_json = load_data(args['infile'], 'kernel', None, None, "KernelLearning: ") if kf_json == None : print "Could not load Kernel from %s" % (args['infile'],) sys.exit(1) kernel = Kernel.fromJSONDict(kf_json) config = kernel.config segment_info = kernel.segment_info if (int(args['pool']) > 1) : pool = multiprocessing.Pool(int(args['pool'])) else : pool = None if (args['learning_C'] != None) : learning_C = parse_range(args['learning_C'], t=float) if not isinstance(learning_C,list) : learning_C = [learning_C] elif not isinstance(learning_C,list) : learning_C = [config.learning_C] else : learning_C = config.learning_C if (args['train_test_partitions'] != None) : partitions_json = load_data(args['train_test_partitions'], 'partitions', None, None, "KernelLearning: ") if partitions_json == None : print "Could not load Train / Test Partitions from %s" % (args['train_test_partitions'],) sys.exit(1) partitions = TrainTestPartitions.fromJSONDict(partitions_json) else : partitions = generate_partitions(config, segment_info, cv_iterations=5 if (len(learning_C) > 1) else 0) if isinstance(learning_C, list) and len(learning_C) > 1 and len(partitions.cross_validation) > 0 : num_cv = len(partitions.cross_validation) learning_wrap = LearningWrapper( kernel ) if pool != None : results = pool.map(learning_wrap, itertools.product(partitions.cross_validation, learning_C)) else : results = map(learning_wrap, itertools.product(partitions.cross_validation, learning_C)) max_correct = 0.0 best_C = learning_C[0] results = list(results) print len(results) for C in learning_C : correct = Learning(config, [_result for (_C, _result) in results if C == _C]).get_average_correct() if correct > max_correct : best_C = C max_correct = correct config.learning_C = best_C print "KernelLearning: using C = %s, correct = %s" % (config.learning_C, max_correct) else : if isinstance(learning_C, list) : config.learning_C = learning_C[0] else : config.learning_C = learning_C learning_wrap = LearningWrapper( kernel ) if pool != None : results = pool.map(learning_wrap, itertools.product(partitions.evaluation, [config.learning_C])) else : results = map(learning_wrap, itertools.product(partitions.evaluation, [config.learning_C])) learning = Learning(config, [result for (C,result) in results]) if args['outfile'] == None : learning_filename = KernelLearning.get_learning_filename(config) else : learning_filename = args['outfile'] correct = learning.get_average_correct() print "%s correct %2.2f%% error %2.2f%% classes %s" % ("KernelLearning:", correct * 100.0, (1.0 - correct)*100.0, len(set([s.max_label() for s in kernel.segment_info]))) print "Writing %s" % (learning_filename, ) learning.config.status = "KernelLearning" save_data(learning_filename, learning.toJSONDict())
def main(argv): parser = argparse.ArgumentParser( description='Tool to generate a similarity kernel from persistence data' ) parser.add_argument('-i', '--infile', help='Input JSON Similarity Kernel file') parser.add_argument('-o', '--outfile', help='Output JSON Learning file') parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') parser.add_argument( '-c', '--learning-C', help= 'C value for SVM. Specify a range for 1-dimensional cross-validation') parser.add_argument('-t', '--train-test-partitions', help='Precomputed train / test partitions') args = vars(parser.parse_args(argv[1:])) kf_json = load_data(args['infile'], 'kernel', None, None, "KernelLearning: ") if kf_json == None: print "Could not load Kernel from %s" % (args['infile'], ) sys.exit(1) kernel = Kernel.fromJSONDict(kf_json) config = kernel.config segment_info = kernel.segment_info if (int(args['pool']) > 1): pool = multiprocessing.Pool(int(args['pool'])) else: pool = None if (args['learning_C'] != None): learning_C = parse_range(args['learning_C'], t=float) if not isinstance(learning_C, list): learning_C = [learning_C] elif not isinstance(learning_C, list): learning_C = [config.learning_C] else: learning_C = config.learning_C if (args['train_test_partitions'] != None): partitions_json = load_data(args['train_test_partitions'], 'partitions', None, None, "KernelLearning: ") if partitions_json == None: print "Could not load Train / Test Partitions from %s" % ( args['train_test_partitions'], ) sys.exit(1) partitions = TrainTestPartitions.fromJSONDict(partitions_json) else: partitions = generate_partitions(config, segment_info, cv_iterations=5 if (len(learning_C) > 1) else 0) if isinstance(learning_C, list) and len(learning_C) > 1 and len( partitions.cross_validation) > 0: num_cv = len(partitions.cross_validation) learning_wrap = LearningWrapper(kernel) if pool != None: results = pool.map( learning_wrap, itertools.product(partitions.cross_validation, learning_C)) else: results = map( learning_wrap, itertools.product(partitions.cross_validation, learning_C)) max_correct = 0.0 best_C = learning_C[0] results = list(results) print len(results) for C in learning_C: correct = Learning( config, [_result for (_C, _result) in results if C == _C ]).get_average_correct() if correct > max_correct: best_C = C max_correct = correct config.learning_C = best_C print "KernelLearning: using C = %s, correct = %s" % ( config.learning_C, max_correct) else: if isinstance(learning_C, list): config.learning_C = learning_C[0] else: config.learning_C = learning_C learning_wrap = LearningWrapper(kernel) if pool != None: results = pool.map( learning_wrap, itertools.product(partitions.evaluation, [config.learning_C])) else: results = map( learning_wrap, itertools.product(partitions.evaluation, [config.learning_C])) learning = Learning(config, [result for (C, result) in results]) if args['outfile'] == None: learning_filename = KernelLearning.get_learning_filename(config) else: learning_filename = args['outfile'] correct = learning.get_average_correct() print "%s correct %2.2f%% error %2.2f%% classes %s" % ( "KernelLearning:", correct * 100.0, (1.0 - correct) * 100.0, len(set([s.max_label() for s in kernel.segment_info]))) print "Writing %s" % (learning_filename, ) learning.config.status = "KernelLearning" save_data(learning_filename, learning.toJSONDict())
def compute(distance_type, distance_array, segment_compare, pool, max_simplices, epsilon, segments=None, pds=None, ds=None): compute_pool = multiprocessing.Pool(pool) d_len = len(distance_array) d_rng = range(d_len) last = -1 if pds == None and ds == None: persistence_diagrams = [None for x in segments.segments] print "Generating initial persistence diagram" persistence_diagrams[0] = PersistenceGenerator.process( (segments.segments[0], (max_simplices, epsilon))) diagram_generator = yieldPersistenceDiagramAndDistance( max_simplices, epsilon, persistence_diagrams[0], distance_type) results = compute_pool.imap( diagram_generator, itertools.izip(segments.segments[1:], d_rng[1:])) for (i, diagram, distance) in results: persistence_diagrams[i] = diagram distance_array[i] = distance config = segments.config config.max_simplices = max_simplices config.persistence_epsilon = epsilon diagrams = PersistenceDiagrams(config, persistence_diagrams) filename = PersistenceDiagrams.get_persistence_diagrams_filename( config) print "plot_persistence_distance.py: Writing %s" % (filename, ) save_data(filename, diagrams.toJSONDict()) elif pds != None: persistence_diagrams = pds.diagrams config = pds.config distances = [[None for y in d_rng] for x in d_rng] if ds == None: print "Computing Distance Array" distance_generator = yieldDistance(distance_type) results = compute_pool.imap( distance_generator, itertools.product(itertools.izip(persistence_diagrams, d_rng), itertools.izip(persistence_diagrams, d_rng)), max(1, d_len**2 / (10 * pool))) for (i, j, distance) in results: distances[i][j] = Distance(None, distance, None, None) if segment_compare.value != last: last = segment_compare.value for k in d_rng: distance_array[k] = distances[last][ k].mean if distances[last][k] != None else -1.0 if distance_type == 'bottleneck': filename = BottleneckDistances.get_distances_filename(config) elif distance_type == 'wasserstein': filename = WassersteinDistances.get_distances_filename(config) print "plot_persistence_distance.py: Writing %s" % (filename, ) save_data( filename, Distances(config, distances, [d.segment_info for d in persistence_diagrams]).toJSONDict()) else: for i in d_rng: for j in d_rng: distances[i][j] = ds.distances[i][j] last = -1 compute_pool.close() compute_pool.join() last = segment_compare.value for k in d_rng: distance_array[k] = distances[last][k].mean while True: if segment_compare.value != last: last = segment_compare.value for k in d_rng: distance_array[k] = distances[last][k].mean else: time.sleep(0.05)
if "Segments" in file_class or \ "Post" in file_class : out_file = module_class.get_segment_filename(in_obj.config, gz=False) elif "Features" in file_class: out_file = module_class.get_features_filename(in_obj.config, gz=False) elif "PersistenceDiagrams" in file_class: out_file = module_class.get_persistence_diagrams_filename( in_obj.config, gz=False) elif "Partition" in file_class: out_file = module_class.get_partition_filename(in_obj.config, gz=False) elif "Learning" in file_class: out_file = module_class.get_learning_filename(in_obj.config, gz=False) elif "Distances" in file_class or \ "ScaleSpaceSimilarity" in file_class : out_file = module_class.get_distances_filename(in_obj.config, gz=False) elif "AverageKernel" in file_class: out_file = get_filename(in_obj.config, [ 'max_simplices', 'persistence_epsilon', 'segment_filename', 'segment_stride', 'segment_size', 'window_size', 'window_stride', 'kernel_scale', 'kernel_gamma', 'invariant_epsilon', 'data_file', 'data_index', 'label_index', 'persistence_degree', 'data_type', 'post_process', 'post_process_arg' ], "AverageKernel") elif "Kernel" in file_class: out_file = module_class.get_kernel_filename(in_obj.config, gz=False) elif "CrossValidation" in file_class: out_file = module_class.get_cross_validation_filename(in_obj.config, gz=False) print "Writing %s" % (out_file, ) save_data(out_file, in_obj.toJSONDict())
import os import sys import math import itertools import multiprocessing from persistence.Datatypes.JSONObject import load_data, save_data from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams, PersistenceDiagram def avg(l) : return sum(l,0.0) / len(l) def average_density(diagram) : points = [(p[0], p[1]) for p in diagram.points if p[2] == 1] if len(points) > 2 : diagram_distances = [] for (x0,y0) in points : distances = map(lambda (x1,y1) : math.sqrt((x0 - x1) * (x0 - x1) + (x0 - x1) * (x0 - x1)), points) diagram_distances.append(avg(distances[1:6])) return avg(diagram_distances) else : return 0.0 if __name__ == "__main__" : pool = multiprocessing.Pool(multiprocessing.cpu_count() - 2) for f in sys.argv[1:] : pds = PersistenceDiagrams.fromJSONDict(load_data(f, None, None, None, sys.argv[0] + " : ")) densities = pool.map(average_density, pds.diagrams) save_data(f + "-density", list(densities))
import sys import argparse import importlib from persistence.Datatypes.JSONObject import load_data, save_data from persistence.Datatypes.Segments import Segments, Segment parser = argparse.ArgumentParser(description="Creates windowed segments of a single dimension for a multidimensioned dataset") parser.add_argument("-i","--infile") parser.add_argument("-d","--data-index", default=0, type=int) parser.add_argument("-w","--window-size", type=int) parser.add_argument("-W","--window-stride", default=1, type=int) args = parser.parse_args(sys.argv[1:]) segments = Segments.fromJSONDict(load_data(args.infile, "segments", None, None, sys.argv[0] + ": ")) orig_window_size = segments.config.window_size segments.config.window_size = args.window_size segments.config.window_stride = args.window_stride dimensions = len(segments.segments[0].data_index) segments.config.data_index = segments.segments[0].data_index[args.data_index] for segment in segments.segments : windows = [[segment.windows[0][(i + j) * dimensions + args.data_index] for j in range(args.window_size)] for i in range(0, orig_window_size, args.window_stride) if ((i + args.window_size - 1) * dimensions + args.data_index) < len(segment.windows[0])] segment.data_index = segment.data_index[args.data_index] segment.window_size = args.window_stride segment.windows = windows segment_module = importlib.import_module("persistence." + segments.config.data_type) segment_class = getattr(segment_module, segments.config.data_type) segment_filename = segment_class.get_segment_filename(segments.config) print "Writing " + segment_filename save_data(segment_filename, segments.toJSONDict())