parser.add_argument(
        '--exclude',
        help=
        "a text file containing strains (one per line) that will be excluded")

    args = parser.parse_args()

    region = args.region
    time_interval = sorted([
        numeric_date(x)
        for x in determine_time_interval(args.time_interval, args.resolution)
    ])
    # read strains to exclude
    excluded_strains = read_strain_list(args.exclude) if args.exclude else []

    # read in meta data, parse numeric dates, and exclude outlier strains
    metadata = {
        k: val
        for k, val in parse_metadata(['segment'], [args.metadata]).items()
        if k not in excluded_strains
    }['segment']

    sequences = []
    print(time_interval)
    for seq in metadata:
        if metadata[seq]["num_date"]>=time_interval[0] and \
           metadata[seq]["num_date"]<time_interval[1]:
            sequences.append(metadata[seq])

    age_distribution(sequences, args.output)
Пример #2
0
    return scores

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="Annotate nodes with scores based on metadata",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument('--metadata', type=str, required=True, help="file with metadata associated with viral sequences, one for each segment")
    parser.add_argument('--tree', type=str, required=True, help="file inferred tree")
    parser.add_argument('--output', help="name of the file to write selected strains to")

    args = parser.parse_args()

    # read in meta data and tree
    metadata = parse_metadata(['s'], [args.metadata])['s']
    T = Phylo.read(args.tree, 'newick')

    # dictionary to hold calculated scores for terminal and internal nodes
    scores = dict()
    for k,v in calculate_average_age(T, metadata, min_clade_size=20).items():
        if k not in scores: scores[k] = {}
        scores[k]['avg_age'] = v

    for k,v in calc_average_vaccination_coverage(T, metadata, min_clade_size=0).items():
        if k not in scores: scores[k] = {}
        scores[k]['vaccov'] = v

    with open(args.output, 'w') as results:
        json.dump({"nodes":scores}, results, indent=1, sort_keys = True)