def get_mean_AA_freqs(file):
    freqs = []
    aa_counts = []
    input = open(file, "r")
    protein_data = input.readlines()
    input.close()
    
    site_data = af.get_transformed_data(protein_data)
    for site in site_data:
        new_site = []
        for element in site:
          new_site.append(float(element))
        freqs.append(new_site)
    freq_array = np.array(freqs)
 
    m,n = freq_array.shape
    total_aa = sum(freq_array)
    m,n = freq_array.shape

    j = 0
    while (j < n):
        aa_freq_sum = sum(freq_array[:, j])
        aa_counts.append(float(aa_freq_sum)/float(total_aa))
        j = j + 1
                       
    return aa_counts
def get_mean_AA_freqs(file):
    freqs = []
    aa_counts = []
    input = open(file, "r")
    protein_data = input.readlines()
    input.close()

    site_data = af.get_transformed_data(protein_data)
    for site in site_data:
        new_site = []
        for element in site:
            new_site.append(float(element))
        freqs.append(new_site)
    freq_array = np.array(freqs)

    m, n = freq_array.shape
    total_aa = sum(freq_array)
    m, n = freq_array.shape

    j = 0
    while (j < n):
        aa_freq_sum = sum(freq_array[:, j])
        aa_counts.append(float(aa_freq_sum) / float(total_aa))
        j = j + 1

    return aa_counts
def get_position_count_data(file):
    buried_counts = []
    surface_counts = []
    buried_sites = []
    surface_sites= []
    input = open(file, "r")
    protein_data = input.readlines()
    input.close()

    RSA = af.get_RSA_Values(file)
    alignment_length = len(RSA)
    site_data = af.get_transformed_data(protein_data)
    #print RSA
    #print site_data
    i = 0
    for site in site_data:
        if(float(RSA[i]) < 0.05):
            buried_sites.append(site)
        else:
            surface_sites.append(site)
        i = i + 1

    buried_array = np.array(buried_sites)
    surface_array = np.array(surface_sites)
    #print buried_array
    #print buried_array
    buried_m, buried_n = buried_array.shape
    buried_total_sum = sum(sum(buried_array))
    
    #print buried_m
    #print buried_n
    surface_m, surface_n = surface_array.shape
    surface_total_sum = sum(sum(surface_array))
    #print surface_total_sum

    j = 0
    while (j < buried_n):
        buried_site_sum = sum(buried_array[:, j])
        buried_counts.append(float(buried_site_sum)/float(buried_total_sum))
        j = j + 1
    
    j = 0
    while (j < surface_n):
        surface_site_sum = sum(surface_array[:, j])
        surface_counts.append(float(surface_site_sum)/float(surface_total_sum))
        j = j + 1
    
    return buried_counts, surface_counts
def get_position_count_data(file):
    buried_counts = []
    surface_counts = []
    buried_sites = []
    surface_sites = []
    input = open(file, "r")
    protein_data = input.readlines()
    input.close()

    RSA = af.get_RSA_Values(file)
    alignment_length = len(RSA)
    site_data = af.get_transformed_data(protein_data)
    i = 0
    for site in site_data:
        if (float(RSA[i]) <= 0.05):
            buried_sites.append(site)
        else:
            surface_sites.append(site)
        i = i + 1

    buried_array = np.array(buried_sites)
    surface_array = np.array(surface_sites)

    buried_m, buried_n = buried_array.shape
    buried_total_sum = sum(sum(buried_array))
    surface_m, surface_n = surface_array.shape
    surface_total_sum = sum(sum(surface_array))
    #print surface_total_sum

    j = 0
    while (j < buried_n):
        buried_site_sum = sum(buried_array[:, j])
        buried_counts.append(float(buried_site_sum) / float(buried_total_sum))
        j = j + 1

    j = 0
    while (j < surface_n):
        surface_site_sum = sum(surface_array[:, j])
        surface_counts.append(
            float(surface_site_sum) / float(surface_total_sum))
        j = j + 1

    return buried_counts, surface_counts