Пример #1
0
def Run_Uverskey(Fasta1, Fasta2, OutFile):


    amyload_seq = load_fasta_file(Fasta1)
    disprot_seq = load_fasta_file(Fasta2)

    net_abs_charge = Feature(get_aa2charge(default=0)).then(average_absolute)
    mean_hydropathy = Feature(get_aa2hydropathy(default=0)).then(average)

    uversky_fs = FeatureSet("uversky")
    uversky_fs.add(mean_hydropathy, name="mean_hydropathy")
    uversky_fs.add(net_abs_charge, name="net_abs_charge")

    amyload_uversky_seq = uversky_fs(amyload_seq)
    disprot_uversky_seq = uversky_fs(disprot_seq)


    amyload_data_x = amyload_uversky_seq.columns(feature="mean_hydropathy")[0]
    amyload_data_y = amyload_uversky_seq.columns(feature="net_abs_charge")[0]
    plt.plot(amyload_data_x, amyload_data_y,'.', label="Amyload")

    disprot_data = compact(disprot_uversky_seq).columns()
    plt.plot(disprot_data[0], disprot_data[1],'.', label="Disprot")

    plt.plot([-0.78, 0.835], [0.0, 0.5],'k')
    plt.xlabel("mean hydrophobicity")
    plt.ylabel("net abs charge")
    plt.legend()

    plt.savefig(OutFile)
Пример #2
0
def Run_ngrams(fasta1, fasta2, OutFile ):

    alphasyn_seq = load_fasta_file(fasta1)
    amyload_pos_seq = load_fasta_file(fasta2)

    fs_aa = FeatureSet("aa patterns")
    fs_aa.add(identity)
    fs_aa.add(pattern_match, pattern='VT', padded=True)
    fs_aa.add(pattern_count, pattern='VT')

    result_seq = fs_aa(alphasyn_seq)

    fs_hp = FeatureSet("hydropathy patterns")
    fs_hp.add(Feature(get_aa2hydropathy()))
    fs_hp.add(Feature(get_aa2hydropathy()).then(pattern_match, pattern=[0.0, 2.0],
                                                metric='taxi', radius=1.0))
    result_seq2 = fs_hp(alphasyn_seq)
    result_freq = ngram_count(alphasyn_seq, n=2)
    result_fit = zipf_law_fit(amyload_pos_seq, n=3, verbose=True)

    counts = sorted(result_fit["ngram_counts"], reverse=True)
    ranks = range(1, len(counts)+1)

    slope = result_fit["slope"]
    harmonic_num = sum([rank**-slope for rank in ranks])
    fitted_counts = [(rank**-slope) / harmonic_num * sum(counts) for rank in ranks]

    plt.plot(ranks, counts, 'k', label="empirical")
    plt.plot(ranks, fitted_counts, 'k--',
             label="Zipf's law\nslope: {:.2f}".format((slope)))
    plt.xlabel('rank')
    plt.ylabel('count')
    plt.xscale('log')
    plt.yscale('log')
    plt.legend()

    plt.savefig(OutFile)
Пример #3
0
def run(Fasta1, Fasta2, windows_per_frame, overlap_factor, xlabel, ylabel,
        pop1_label, pop2_label, htmlOutDir, htmlFname, Workdirpath):

    if not os.path.exists(htmlOutDir):
        os.makedirs(htmlOutDir)

    amyload_pos_seq = load_fasta_file(Fasta1)
    amyload_neg_seq = load_fasta_file(Fasta2)

    # Calculate quantitive features: volume and hydropathy
    mean_volume = Feature(get_aa2volume()).then(average)
    mean_hydropathy = Feature(get_aa2hydropathy()).then(average)

    fs = FeatureSet("volume'n'hydropathy")
    fs.add(mean_volume)
    fs.add(mean_hydropathy)

    amyload_pos_conv_seq = fs(amyload_pos_seq)
    amyload_neg_conv_seq = fs(amyload_neg_seq)

    # Do local Fisher:
    result = local_fisher_2d(amyload_pos_conv_seq,
                             amyload_neg_conv_seq,
                             windows_per_frame=int(windows_per_frame),
                             overlap_factor=int(overlap_factor))

    # Plot local Fisher:
    _plot_local_fisher_2d(result,
                          xlabel=xlabel,
                          ylabel=ylabel,
                          pop1_label=pop1_label,
                          pop2_label=pop2_label,
                          out_file_path=os.path.join(os.getcwd(), "out.png"))

    #   plt.savefig(os.path.join(Workdirpath, htmlOutDir, "1.png"))

    HTML_Gen(os.path.join(Workdirpath, htmlOutDir, htmlFname))
Пример #4
0
# Find and count matches to a pattern 'VT'
fs_aa = FeatureSet("aa patterns")
fs_aa.add(identity)
fs_aa.add(pattern_match, pattern='VT', padded=True)
fs_aa.add(pattern_count, pattern='VT')

result_seq = fs_aa(alphasyn_seq)

for seq in result_seq[:3]:
    print seq

# ...and something much more subtle:
# Map a sequence to the hydrophaty scale, and search for the pattern 0.0 - 2.0
# with the similarity radius 1.0 in the L1 norm (the 'taxi' metric).
fs_hp = FeatureSet("hydropathy patterns")
fs_hp.add(Feature(get_aa2hydropathy()))
fs_hp.add(Feature(get_aa2hydropathy()).then(pattern_match, pattern=[0.0, 2.0],
                                            metric='taxi', radius=1.0))
result_seq2 = fs_hp(alphasyn_seq)

for seq in result_seq2[:2]:
    print seq

# Calculate bigram frequencies in 'alphasyn_seq':
result_freq = ngram_count(alphasyn_seq, n=2)
print result_freq

# Fit Zipf's law for a trigram distribution in 'amyload_pos_seq':
result_fit = zipf_law_fit(amyload_pos_seq, n=3, verbose=True)

# Calculate the empirical rank-frequency plot:
Пример #5
0
from quantiprot.utils.io import load_fasta_file
from quantiprot.utils.sequence import SequenceSet
from quantiprot.utils.sequence import subset, columns
from quantiprot.utils.feature import Feature, FeatureSet

# Conversions-related imports:
from quantiprot.utils.mapping import simplify
from quantiprot.metrics.aaindex import get_aa2charge, get_aa2hydropathy, get_aa2volume, get_aa2mj
from quantiprot.metrics.aaindex import get_aaindex_file
from quantiprot.metrics.basic import identity
import numpy as np

fapr = load_fasta_file('prot_segregating.fasta')  #load fasta
fs = FeatureSet("myTLRset")
fs.add(get_aa2charge())
fs.add(get_aa2volume())
fs.add(get_aa2mj())
fs.add(get_aa2hydropathy())

convfapr = fs(fapr)
metrics = ["formal_charge", "volume", "miyazawa-jernigan",
           "hydropathy"]  # which metrics of AMK to generate

#print convfapr
for m in metrics:
    outf = open(m + ".tsv", "w")
    with outf as f:
        h = np.matrix(columns(convfapr, feature=m, transpose=True))
        f.write(posstr + '\n')
        np.savetxt(f, h, delimiter=',')
Пример #6
0
from quantiprot.utils.feature import Feature, FeatureSet
from quantiprot.metrics.aaindex import get_aa2volume, get_aa2hydropathy
from quantiprot.metrics.basic import average

# Local Fisher-test related imports:
from quantiprot.analysis.fisher import local_fisher_2d, _plot_local_fisher_2d

from matplotlib import pyplot as plt

# Load sets of amyloidogenic and non-amyloidogenic peptides:
amyload_pos_seq = load_fasta_file("data/Amyload_positive.fasta")
amyload_neg_seq = load_fasta_file("data/Amyload_negative.fasta")

# Calculate quantitive features: volume and hydropathy
mean_volume = Feature(get_aa2volume()).then(average)
mean_hydropathy = Feature(get_aa2hydropathy()).then(average)

fs = FeatureSet("volume'n'hydropathy")
fs.add(mean_volume)
fs.add(mean_hydropathy)

amyload_pos_conv_seq = fs(amyload_pos_seq)
amyload_neg_conv_seq = fs(amyload_neg_seq)

# Do local Fisher:
result = local_fisher_2d(amyload_pos_conv_seq,
                         amyload_neg_conv_seq,
                         windows_per_frame=5,
                         overlap_factor=5)

# Plot local Fisher:
Пример #7
0
from quantiprot.utils.io import load_fasta_file
from quantiprot.utils.feature import Feature, FeatureSet
from quantiprot.metrics.aaindex import get_aa2charge, get_aa2hydropathy, get_aa2volume
from quantiprot.utils.mapping import simplify

# Quantification-related imports:
from quantiprot.metrics.basic import identity, average, sum_absolute, uniq_count
from quantiprot.utils.sequence import compact

# Load some data:
alphasyn_seq = load_fasta_file("data/Alphasyn.fasta")

# Prepare Features:
charge_sum_abs_feat = Feature(get_aa2charge()).then(sum_absolute)
hydropathy_average_feat = Feature(get_aa2hydropathy()).then(average)
volume_levels_feat = Feature(
    simplify(get_aa2volume(), name="volume levels", k=3)).then(uniq_count)

# Prepare a FeatureSet
fs = FeatureSet("simple quantification")
fs.add(hydropathy_average_feat)
fs.add(charge_sum_abs_feat)
fs.add(volume_levels_feat)

# And use it to quantify protein sequence(s):
result_seq = fs(alphasyn_seq)
print result_seq
for seq in result_seq:
    print seq
Пример #8
0
# Uversky plot
from quantiprot.utils.io import load_fasta_file
from quantiprot.utils.feature import Feature, FeatureSet
from quantiprot.utils.sequence import compact
from quantiprot.metrics.aaindex import get_aa2charge, get_aa2hydropathy
from quantiprot.metrics.basic import average, average_absolute

from matplotlib import pyplot as plt

amyload_seq = load_fasta_file("data/Amyload_positive.fasta")
disprot_seq = load_fasta_file("data/Disprot.fasta")

# Non-standard letters in Disprot assigned neutral charge and hydropathy:
net_abs_charge = Feature(get_aa2charge(default=0)).then(average_absolute)
mean_hydropathy = Feature(get_aa2hydropathy(default=0)).then(average)

uversky_fs = FeatureSet("uversky")
uversky_fs.add(mean_hydropathy, name="mean_hydropathy")
uversky_fs.add(net_abs_charge, name="net_abs_charge")

amyload_uversky_seq = uversky_fs(amyload_seq)
disprot_uversky_seq = uversky_fs(disprot_seq)

# First approach to get hydrophobicity/charge pairs
amyload_data_x = amyload_uversky_seq.columns(feature="mean_hydropathy")[0]
amyload_data_y = amyload_uversky_seq.columns(feature="net_abs_charge")[0]
plt.plot(amyload_data_x, amyload_data_y, '.', label="Amyload")

# Second approach to get hydrophobicity/charge pairs
disprot_data = compact(disprot_uversky_seq).columns()