Пример #1
0
    sys.stderr.write("Calculating unique samples...\n")
    # Only consider one sample per person
    snp_samples = snp_samples[parse_midas_data.calculate_unique_samples(
        subject_sample_map, sample_list=snp_samples)]

    if len(snp_samples) < min_sample_size:
        sys.stderr.write("Not enough unique samples!\n")
        continue

    # Load divergence matrices
    sys.stderr.write("Loading pre-computed substitution rates for %s...\n" %
                     species_name)
    substitution_rate_map = calculate_substitution_rates.load_substitution_rate_map(
        species_name)
    sys.stderr.write("Calculating matrices...\n")
    dummy_samples, syn_difference_matrix, syn_opportunity_matrix = calculate_substitution_rates.calculate_matrices_from_substitution_rate_map(
        substitution_rate_map, '4D', allowed_samples=snp_samples)
    dummy_samples, non_difference_matrix, non_opportunity_matrix = calculate_substitution_rates.calculate_matrices_from_substitution_rate_map(
        substitution_rate_map, '1D', allowed_samples=snp_samples)
    snp_samples = dummy_samples

    syn_differences[species_name] = []
    syn_pseudocounts[species_name] = []
    syn_opportunities[species_name] = []

    non_differences[species_name] = []
    non_pseudocounts[species_name] = []
    non_opportunities[species_name] = []

    for i in xrange(0, syn_difference_matrix.shape[0]):
        for j in xrange(i + 1, syn_difference_matrix.shape[0]):
    snp_samples = []
    for sample_name in haploid_samples:
        if sample_country_map[sample_name] == 'United Kingdom':
            snp_samples.append(sample_name)

    if len(snp_samples) < 10:
        sys.stderr.write("Not enough unique samples!\n")
        continue

    # Load divergence matrices
    sys.stderr.write("Loading pre-computed substitution rates for %s...\n" %
                     species_name)
    substitution_rate_map = calculate_substitution_rates.load_substitution_rate_map(
        species_name)
    sys.stderr.write("Calculating matrix...\n")
    dummy_samples, snp_difference_matrix, snp_opportunity_matrix = calculate_substitution_rates.calculate_matrices_from_substitution_rate_map(
        substitution_rate_map, 'core', allowed_samples=snp_samples)
    snp_samples = dummy_samples

    sys.stderr.write("Done!\n")

    snp_substitution_matrix = snp_difference_matrix * 1.0 / (
        snp_opportunity_matrix + (snp_opportunity_matrix == 0))

    closest_snp_substitution_rates = []
    pair_snp_substitution_rates = []
    for i in xrange(0, snp_opportunity_matrix.shape[0]):

        min_substitution_rate = 1e09

        for j in xrange(0, snp_opportunity_matrix.shape[0]):
Пример #3
0
    sys.stderr.write("Calculating SNV matrix...\n")
    dummy_samples, snp_mut_difference_matrix, snp_rev_difference_matrix, snp_mut_opportunity_matrix, snp_rev_opportunity_matrix = calculate_substitution_rates.calculate_mutrev_matrices_from_substitution_rate_map(substitution_rate_map, 'all', allowed_samples=snp_samples)
    snp_samples = dummy_samples
    
    gene_samples, gene_loss_difference_matrix, gene_gain_difference_matrix, gene_loss_opportunity_matrix, gene_gain_opportunity_matrix = calculate_substitution_rates.calculate_mutrev_matrices_from_substitution_rate_map(substitution_rate_map, 'genes', allowed_samples=snp_samples)
    
    gene_difference_matrices = {'gains': gene_gain_difference_matrix, 'losses': gene_loss_difference_matrix}
    gene_opportunity_matrix = gene_loss_opportunity_matrix
    
    opportunity_matrices = {}
    difference_matrices = {}

    
    for var_type in variant_types:
        
        dummy_samples, difference_matrix, opportunity_matrix =    calculate_substitution_rates.calculate_matrices_from_substitution_rate_map(substitution_rate_map, var_type, allowed_samples=snp_samples)
    
        difference_matrices[var_type] = difference_matrix
        opportunity_matrices[var_type] = opportunity_matrix

    difference_matrices['muts'] = snp_mut_difference_matrix
    difference_matrices['revs'] = snp_rev_difference_matrix
    opportunity_matrices['muts'] = snp_mut_opportunity_matrix
    opportunity_matrices['revs'] = snp_rev_opportunity_matrix
    
    snp_difference_matrix = snp_mut_difference_matrix+snp_rev_difference_matrix
    snp_opportunity_matrix = snp_mut_opportunity_matrix+snp_rev_opportunity_matrix
    
    gene_difference_matrix = gene_gain_difference_matrix + gene_loss_difference_matrix
        
    snp_substitution_rate =     snp_difference_matrix*1.0/(snp_opportunity_matrix+(snp_opportunity_matrix==0))