def clade_frequency_correlations_func_polarizer(params):
    # set up the prediction and pass all parameters to the wrapper function
    params.diffusion=1.0
    params.gamma = 1.0
    prediction = test_flu.predict_params(['polarizer'],
                                        params)


    # define the methodes for which the predictions are to be evaluated
    methods = [('polarizer', '_ext', prediction.terminals),
                ('polarizer', '_int', prediction.non_terminals)]
    distances, distances_epi, test_data = test_flu.evaluate(prediction, methods, params)


    tbins_eval = [date(year=params.year, month = 6, day=1), date(year=params.year+1, month = 6, day=1)]
    combined_data = test_flu.make_combined_data(prediction, test_data, collapse = params.collapse)
    combined_tree = flu.flu_ranking(combined_data, time_bins = tbins_eval, pseudo_count = 0)
    combined_tree.expansion_score()

    tree_utils.find_internal_nodes(prediction.T,combined_tree.T)
    freqs = [node.mirror_node.temporal_frequency for node in prediction.non_terminals]
    polarizers= []
    for tau in mem_scale:
        prediction.calculate_polarizers(mem = tau)
        polarizers.append([node.polarizer for node in prediction.non_terminals])

    polarizers_and_freqs = np.hstack( (np.array(polarizers).T, np.array(freqs)))
    return polarizers_and_freqs
def clade_frequency_correlations_func(params):
    # set up the prediction and pass all parameters to the wrapper function
    prediction = test_flu.predict_params(['mean_fitness'],
                                        params)


    # define the methodes for which the predictions are to be evaluated
    methods = [('mean_fitness', '_ext', prediction.terminals),
                ('mean_fitness', '_int', prediction.non_terminals)]
    distances, distances_epi, test_data = test_flu.evaluate(prediction, methods, params)


    tbins_eval = [date(year=params.year, month = 6, day=1), date(year=params.year+1, month = 6, day=1)]
    combined_data = test_flu.make_combined_data(prediction, test_data, collapse = params.collapse)
    combined_tree = flu.flu_ranking(combined_data, time_bins = tbins_eval, pseudo_count = 0)
    combined_tree.expansion_score()

    tree_utils.find_internal_nodes(prediction.T,combined_tree.T)

    fitness_and_freqs = []
    for node in prediction.non_terminals:
        fitness_and_freqs.append([node.mean_fitness, node.polarizer]+list(node.mirror_node.temporal_frequency))

    fitness_and_freqs = np.array(fitness_and_freqs)
    return fitness_and_freqs
def clade_frequency_correlations_func(params):
    # set up the prediction and pass all parameters to the wrapper function
    prediction = test_flu.predict_params(['mean_fitness'], params)

    # define the methodes for which the predictions are to be evaluated
    methods = [('mean_fitness', '_ext', prediction.terminals),
               ('mean_fitness', '_int', prediction.non_terminals)]
    distances, distances_epi, test_data = test_flu.evaluate(
        prediction, methods, params)

    tbins_eval = [
        date(year=params.year, month=6, day=1),
        date(year=params.year + 1, month=6, day=1)
    ]
    combined_data = test_flu.make_combined_data(prediction,
                                                test_data,
                                                collapse=params.collapse)
    combined_tree = flu.flu_ranking(combined_data,
                                    time_bins=tbins_eval,
                                    pseudo_count=0)
    combined_tree.expansion_score()

    tree_utils.find_internal_nodes(prediction.T, combined_tree.T)

    fitness_and_freqs = []
    for node in prediction.non_terminals:
        fitness_and_freqs.append([node.mean_fitness, node.polarizer] +
                                 list(node.mirror_node.temporal_frequency))

    fitness_and_freqs = np.array(fitness_and_freqs)
    return fitness_and_freqs
def clade_frequency_correlations_func_polarizer(params):
    # set up the prediction and pass all parameters to the wrapper function
    params.diffusion = 1.0
    params.gamma = 1.0
    prediction = test_flu.predict_params(['polarizer'], params)

    # define the methodes for which the predictions are to be evaluated
    methods = [('polarizer', '_ext', prediction.terminals),
               ('polarizer', '_int', prediction.non_terminals)]
    distances, distances_epi, test_data = test_flu.evaluate(
        prediction, methods, params)

    tbins_eval = [
        date(year=params.year, month=6, day=1),
        date(year=params.year + 1, month=6, day=1)
    ]
    combined_data = test_flu.make_combined_data(prediction,
                                                test_data,
                                                collapse=params.collapse)
    combined_tree = flu.flu_ranking(combined_data,
                                    time_bins=tbins_eval,
                                    pseudo_count=0)
    combined_tree.expansion_score()

    tree_utils.find_internal_nodes(prediction.T, combined_tree.T)
    freqs = [
        node.mirror_node.temporal_frequency
        for node in prediction.non_terminals
    ]
    polarizers = []
    for tau in mem_scale:
        prediction.calculate_polarizers(mem=tau)
        polarizers.append(
            [node.polarizer for node in prediction.non_terminals])

    polarizers_and_freqs = np.hstack((np.array(polarizers).T, np.array(freqs)))
    return polarizers_and_freqs
Пример #5
0
    prediction = test_flu.predict_params([
        'mean_fitness', 'expansion_score', 'depth', 'polarizer',
        flu.combined_ranking_internal, flu.combined_ranking_external
    ], params)

    # define the methodes for which the predictions are to be evaluated
    methods = [('mean_fitness', '_ext', prediction.terminals),
               ('mean_fitness', '_int', prediction.non_terminals),
               ('expansion_score', '_int', prediction.non_terminals),
               ('expansion_fitness', '', prediction.non_terminals),
               ('time_fitness', '', prediction.terminals),
               ('ladder_rank', '', prediction.terminals),
               ('date', '', prediction.terminals),
               ('polarizer', '_ext', prediction.terminals),
               ('polarizer', '_int', prediction.non_terminals)]
    distances, distances_epi, test_data = test_flu.evaluate(
        prediction, methods, params)
    nuc_dist_array[ii,:] = [distances['average'],distances['minimal'],distances['L&L']]\
                    +[distances[m[0]+m[1]] for m in methods]
    epi_dist_array[ii,:] = [distances_epi['average'],distances_epi['minimal'],distances_epi['L&L']]\
                    +[distances_epi[m[0]+m[1]] for m in methods]
    # memorize the strain predicted best
    top_strains.append(
        prediction.best_node(method=top_strain_method,
                             nodes=prediction.terminals))

#if file does not exist, create and write header
fname_nuc = analysis_folder + '_'.join([fname_base, name_mod, 'nuc.dat'])
if not os.path.isfile(fname_nuc):
    with open(fname_nuc, 'w') as outfile:
        outfile.write('#average\tminimal\tL&L\t' +
                      '\t'.join([m[0] + m[1] for m in methods]) + '\n')
parser = test_flu.make_flu_parser()
parser.add_argument('--tau', default = 1.0, type = float, help= 'memory time scale of the tree polarizer')
params=parser.parse_args()
# get name snippets to link output files to run parameters
base_name, name_mod = test_flu.get_fname(params)
params.gamma=1.0
params.diffusion=1.0

# set up the prediction and pass all parameters to the wrapper function
prediction = test_flu.predict_params(['polarizer'], params)
prediction.calculate_polarizers(params.tau)

# define the methodes for which the predictions are to be evaluated
methods = [ ('polarizer', '_ext', prediction.terminals),
            ('polarizer', '_int', prediction.non_terminals)]
distances, distances_epi, test_data = test_flu.evaluate(prediction, methods, params)

# calculate the fitness differentials for each internal branch and associate with 
# different types of mutations that happen on these branches
dfit = []
for node in prediction.non_terminals:
    for child in node.clades:
        delta_fitness = child.polarizer - node.polarizer
        muts = child.mutations
        aa_muts = child.aa_mutations
        if child.branch_length<0.01:
            dfit.append((delta_fitness,child.is_terminal(), len(muts), len(aa_muts),
                         len([m[0] for m in aa_muts if m[0] in flu.HA1_antigenic_sites]),
                         len([m[0] for m in aa_muts if m[0] in flu.cluster_positions])))
        else:
            print child,'excluded due to long branch length'