def test(): out_directory = "../test_128" in_directory = "../test_trees_128" image_directory = "../images/128" with multiprocessing.Pool(processes=4) as pool: parse_trees_to_json.parse_trees(in_directory) vlmcs = VLMC.from_json_dir(in_directory) lengths = [int(l) for l in np.logspace(2, 6, 10)] print(lengths) distances = calculate_distances_for_lengths(vlmcs, lengths, out_directory, image_directory, pool) plot_results(vlmcs, distances, lengths, image_directory)
def distance_for_length(length, vlmcs, out_directory, image_directory, d): repetitions = 5 distances = np.zeros(len(vlmcs)) for _ in range(repetitions): train(vlmcs, length, out_directory) parse_trees_to_json.parse_trees(out_directory) new_vlmcs = VLMC.from_json_dir(out_directory) pairs = pair_vlmcs(vlmcs, new_vlmcs) # plot_vlmcs(pairs, image_directory) rep_distance = distance_calculation(pairs, d) distances += rep_distance / repetitions print("{} done".format(length)) return distances
def test(args): tree_directory = args.directory out_directory = args.out_directory parse_trees_to_json.parse_trees(tree_directory) vlmcs = VLMC.from_json_dir(tree_directory) cluster_class = parse_clustering_method(args) d = parse_distance_method(args) if args.name: name = args.name else: name = cluster_class.__name__ + ", " + d.__class__.__name__ metrics = test_clustering(d, vlmcs, cluster_class) try: os.stat(out_directory) except: os.mkdir(out_directory) plot_metrics(metrics, out_directory, name)
def test_distance_function(d, tree_dir, out_dir, plot_distances=False, plot_boxes=False): parse_trees_to_json.parse_trees(tree_dir) vlmcs = VLMC.from_json_dir(tree_dir) metadata = get_metadata_for([vlmc.name for vlmc in vlmcs]) test_dir = tree_dir + "_test" if os.path.isdir(test_dir): parse_trees_to_json.parse_trees(test_dir) test_vlmcs = VLMC.from_json_dir(test_dir) else: test_vlmcs = vlmcs if out_dir is not None: try: os.stat(out_dir) except: os.mkdir(out_dir) return test_distance_function_(d, vlmcs, test_vlmcs, metadata, out_dir, True, False, plot_distances, plot_boxes)
parser.add_argument('--intersection', action='store_true') parser.add_argument('--occurrence-probability-labels', action='store_true') parser.add_argument( '--directory', type=str, default='../trees_pst_better', help='The directory which contains the vlmcs to be printed.') parser.add_argument( '--out-directory', type=str, default='../images', help='The directory to where the images should be written.') args = parser.parse_args() try: os.stat(args.out_directory) except: os.mkdir(args.out_directory) parse_trees(args.directory, args.deltas) vlmcs = VLMC.from_json_dir(args.directory) metadata = get_metadata_for([vlmc.name for vlmc in vlmcs]) if args.intersection: save_intersection(vlmcs, metadata, args.out_directory) else: save(vlmcs, metadata, args.out_directory, args.deltas, args.occurrence_probability_labels)
def number_in_rank(metadata, key): rank = Counter([v[key] for v in metadata.values()]) print("{}: size {}\n{}".format(key, len(rank.keys()), rank)) def order_analysis(vlmcs): orders = [v.order for v in vlmcs] order_counts = Counter(orders) min_order = np.min(orders) max_order = np.max(orders) average_order = np.mean(orders) print("Orders: {}, min: {}, max: {}, average: {}".format( order_counts, min_order, max_order, average_order)) if __name__ == '__main__': tree_dir = '../trees_virus_martin_all_96' # tree_dir = '../trees_more_192' parse_trees_to_json.parse_trees(tree_dir) vlmcs = VLMC.from_json_dir(tree_dir) metadata = get_metadata_for([vlmc.name for vlmc in vlmcs]) # vlmcs = [v for v in vlmcs if metadata[v.name]['genus'] == 'Ebolavirus'] # metadata = {k: v for k, v in metadata.items() if v['genus'] == 'Ebolavirus'} # vlmcs = {metadata[v.name]['species']: v for v in vlmcs} # vlmcs = [v for _, v in vlmcs.items()] metadata = get_metadata_for([vlmc.name for vlmc in vlmcs]) number_in_ranks(metadata) # order_analysis(vlmcs)
def get_vlmcs(out_directory, number_of_parameters): directory = os.path.join(out_directory, str(number_of_parameters)) parse_trees(directory) return VLMC.from_json_dir(directory)
def example_distance(tree_dir, image_dir): parse_trees_to_json.parse_trees(tree_dir) vlmcs = VLMC.from_json_dir(tree_dir) d = FrobeniusNorm() metadata = {v.name: {n: v.name for n in ['species', 'family', 'genus']} for v in vlmcs} test_distance_function_(d, vlmcs, vlmcs, metadata, image_dir)
def parse_trees(args): tree_dir = args.directory parse_trees_to_json.parse_trees(tree_dir) return VLMC.from_json_dir(tree_dir)