parser.add_argument( '--exclude', help= "a text file containing strains (one per line) that will be excluded") args = parser.parse_args() region = args.region time_interval = sorted([ numeric_date(x) for x in determine_time_interval(args.time_interval, args.resolution) ]) # read strains to exclude excluded_strains = read_strain_list(args.exclude) if args.exclude else [] # read in meta data, parse numeric dates, and exclude outlier strains metadata = { k: val for k, val in parse_metadata(['segment'], [args.metadata]).items() if k not in excluded_strains }['segment'] sequences = [] print(time_interval) for seq in metadata: if metadata[seq]["num_date"]>=time_interval[0] and \ metadata[seq]["num_date"]<time_interval[1]: sequences.append(metadata[seq]) age_distribution(sequences, args.output)
return scores if __name__ == '__main__': parser = argparse.ArgumentParser( description="Annotate nodes with scores based on metadata", formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument('--metadata', type=str, required=True, help="file with metadata associated with viral sequences, one for each segment") parser.add_argument('--tree', type=str, required=True, help="file inferred tree") parser.add_argument('--output', help="name of the file to write selected strains to") args = parser.parse_args() # read in meta data and tree metadata = parse_metadata(['s'], [args.metadata])['s'] T = Phylo.read(args.tree, 'newick') # dictionary to hold calculated scores for terminal and internal nodes scores = dict() for k,v in calculate_average_age(T, metadata, min_clade_size=20).items(): if k not in scores: scores[k] = {} scores[k]['avg_age'] = v for k,v in calc_average_vaccination_coverage(T, metadata, min_clade_size=0).items(): if k not in scores: scores[k] = {} scores[k]['vaccov'] = v with open(args.output, 'w') as results: json.dump({"nodes":scores}, results, indent=1, sort_keys = True)