def calculate_codon_properties(cds_dict, args):
    print("\n\nPerforming codon analyses now\n")
    pcg = ['cox1','cox3','atp6','atp8','nad4l','nad4','nad6','nad1','nad5','cob','nad2','nad3','cox2']
    cds_dict = filter_ambiguous(cds_dict, pcg)
    reference = []
    i = 1
    cont = -1
    for tupl in cds_dict.keys():
        print(' ' + str(round(i/len(cds_dict.keys())*100, 2)) + '%', end = "\r")
        i += 1
        for gene in pcg:
            if gene in tupl:
                cai_value = "NA"
                sequence = cds_dict[tupl]
                sequence = sequence[:-3]
                rscu_list = []
                rscu_list.append(sequence)
                if args.CAI == True:
                    for other_tpl in cds_dict.keys():
                        cont += 1
                        if other_tpl != tupl:
                            if list(cds_dict.keys())[cont][4] == args.CAI:
                                reference.append(cds_dict[other_tpl])
                    cont = -1
                    cai_value = CAI(sequence, reference= reference, genetic_code= int(args.GeneCode))
                rscu_values = RSCU(rscu_list, genetic_code= int(args.GeneCode))
                write_codon_properties(file = tupl, cai = cai_value, rscu = rscu_values, args = args)
示例#2
0
def test_cai():
    # first, make sure all arguments get the same result
    assert CAI("AAC", reference=["AAC"]) == CAI("AAC", RSCUs=RSCU(["AAC"])) == CAI("AAC", weights=relative_adaptiveness(sequences=["AAC"])) == 1.0

    # check other sequences
    assert CAI("AAT", reference=["AAC"]) == 0.5
    assert CAI("AATAAT", reference=["AAC"]) == 0.5
    assert CAI("AAT"*100, reference=["AAC"]) == 0.5
示例#3
0
def test_bad_args():
    # make sure bad arguments raise errors
    with pytest.raises(TypeError):
        CAI("AAC")  # no reference data
    with pytest.raises(TypeError):
        CAI("AAC", reference=["AAC"], RSCUs=RSCU(["AAC"]))
示例#4
0
def test_stop_codon():
    # stop codons should be equivalent to an empty string since they don't have RSCUs
    assert RSCU(["TAA"]) == RSCU(["TAG"]) == RSCU(["   "])
示例#5
0
def test_multiple_seqs():
    # multiple sequences should be identical to their concatenation
    assert RSCU(["AAC", "ATC"]) == RSCU(["AACATC"])
    assert RSCU(["AAC", "ATC", "AACGATACGGCACGT"]) == RSCU(["AACATCAACGATACGGCACGT"])
示例#6
0
def test_seq():
    # make sure module works on Bio.Seq objects
    from Bio.Seq import Seq

    assert RSCU([Seq("AGC")]) == RSCU(["AGC"])
    assert RSCU([Seq("AACGATACGGCACGT")]) == RSCU(["AACGATACGGCACGT"])
示例#7
0
def test_str_arg():
    # raise an error if given a string
    with pytest.raises(ValueError):
        RSCU("AAA")
示例#8
0
def test_sum():
    # The sum of the RSCUs should be equal to the number of codons
    assert abs(sum(RSCU(["AAC"]).values()) - len(RSCU(["AAC"]))) < 0.0001
    assert abs(sum(RSCU(["AACGATACGGCACGT"]).values()) - len(RSCU(["AAC"]))) < 0.0001
示例#9
0
def test_rscu():
    assert RSCU(["AAC"]) == {
        "AAA": 1.0,
        "AAC": 1 / (0.5 * (1 + 0.5)),
        "AAG": 1.0,
        "AAT": 0.5 / (0.5 * (1 + 0.5)),
        "ACA": 1.0,
        "ACC": 1.0,
        "ACG": 1.0,
        "ACT": 1.0,
        "AGA": 1.0,
        "AGC": 1.0,
        "AGG": 1.0,
        "AGT": 1.0,
        "ATA": 1.0,
        "ATC": 1.0,
        "ATG": 1.0,
        "ATT": 1.0,
        "CAA": 1.0,
        "CAC": 1.0,
        "CAG": 1.0,
        "CAT": 1.0,
        "CCA": 1.0,
        "CCC": 1.0,
        "CCG": 1.0,
        "CCT": 1.0,
        "CGA": 1.0,
        "CGC": 1.0,
        "CGG": 1.0,
        "CGT": 1.0,
        "CTA": 1.0,
        "CTC": 1.0,
        "CTG": 1.0,
        "CTT": 1.0,
        "GAA": 1.0,
        "GAC": 1.0,
        "GAG": 1.0,
        "GAT": 1.0,
        "GCA": 1.0,
        "GCC": 1.0,
        "GCG": 1.0,
        "GCT": 1.0,
        "GGA": 1.0,
        "GGC": 1.0,
        "GGG": 1.0,
        "GGT": 1.0,
        "GTA": 1.0,
        "GTC": 1.0,
        "GTG": 1.0,
        "GTT": 1.0,
        "TAC": 1.0,
        "TAT": 1.0,
        "TCA": 1.0,
        "TCC": 1.0,
        "TCG": 1.0,
        "TCT": 1.0,
        "TGC": 1.0,
        "TGG": 1.0,
        "TGT": 1.0,
        "TTA": 1.0,
        "TTC": 1.0,
        "TTG": 1.0,
        "TTT": 1.0,
    }
示例#10
0
	seq_list = []
	counter = 0
	n_count = 0
	seq_object = SeqIO.parse(input_path, "fasta")
	for seqs in seq_object:
				seq_id = seqs.id
				seq = str(seqs.seq)
				seq_list.append(seq)
				counter += 1
				nn = len(seq)
				n_count += nn

	print("\n" + str(counter) + " genes imported containing " + str(n_count) + " nucleotides")
	print("\nCalculating RSCU for imported genes\n")
	try:
		RSCU_list = RSCU(seq_list)
	except:
		print("\nEXCEPTION: RSCU could not be caluclated for imported genes")
	print("\nParsing RSCU for codon optimization")
	#To do: Create RSCU parser function
	for k, v in codon_table_11.items():
		for k2, v2 in v.items():
			if k2 in RSCU_list:
				codon_table_11[k][k2] = RSCU_list[k2]


	print("\nOptimizing codons for input gene list")
	#Read gene fasta sequence and initiate optimizer


	problem = DnaOptimizationProblem(
def test_bad_args():
    # make sure bad arguments raise errors
    with pytest.raises(TypeError):
        relative_adaptiveness()
    with pytest.raises(TypeError):
        relative_adaptiveness(sequences=["AAC"], RSCUs=RSCU(["AAC"]))
def test_arg_equivalence():
    # should be able to take either reference sequences or an RSCU dict
    assert relative_adaptiveness(sequences=["AAC"]) == relative_adaptiveness(
        RSCUs=RSCU(["AAC"]))
示例#13
0
def test_rscu():
    assert RSCU(["AAC"]) == {
        'AAA': 1.0,
        'AAC': 1 / (0.5 * (1 + 0.5)),
        'AAG': 1.0,
        'AAT': 0.5 / (0.5 * (1 + 0.5)),
        'ACA': 1.0,
        'ACC': 1.0,
        'ACG': 1.0,
        'ACT': 1.0,
        'AGA': 1.0,
        'AGC': 1.0,
        'AGG': 1.0,
        'AGT': 1.0,
        'ATA': 1.0,
        'ATC': 1.0,
        'ATG': 1.0,
        'ATT': 1.0,
        'CAA': 1.0,
        'CAC': 1.0,
        'CAG': 1.0,
        'CAT': 1.0,
        'CCA': 1.0,
        'CCC': 1.0,
        'CCG': 1.0,
        'CCT': 1.0,
        'CGA': 1.0,
        'CGC': 1.0,
        'CGG': 1.0,
        'CGT': 1.0,
        'CTA': 1.0,
        'CTC': 1.0,
        'CTG': 1.0,
        'CTT': 1.0,
        'GAA': 1.0,
        'GAC': 1.0,
        'GAG': 1.0,
        'GAT': 1.0,
        'GCA': 1.0,
        'GCC': 1.0,
        'GCG': 1.0,
        'GCT': 1.0,
        'GGA': 1.0,
        'GGC': 1.0,
        'GGG': 1.0,
        'GGT': 1.0,
        'GTA': 1.0,
        'GTC': 1.0,
        'GTG': 1.0,
        'GTT': 1.0,
        'TAC': 1.0,
        'TAT': 1.0,
        'TCA': 1.0,
        'TCC': 1.0,
        'TCG': 1.0,
        'TCT': 1.0,
        'TGC': 1.0,
        'TGG': 1.0,
        'TGT': 1.0,
        'TTA': 1.0,
        'TTC': 1.0,
        'TTG': 1.0,
        'TTT': 1.0
    }
示例#14
0
        return ss.pearsonr(x, y)[0]
    elif "spearman" in corelationFunction:
        return ss.spearmanr(x, y, nan_policy="omit")[0]
    elif "kendall" in corelationFunction:
        return ss.kendalltau(x, y, nan_policy="omit")[0]


geneDict = FSB.findSequenceByID(targetFastaFile, idType="raw")

print("Adult Set:")
seqList = []
for gene in geneDict:
    seq = geneDict[gene]
    seqList.append(seq)

adultRSCU = RSCU(seqList)
correlationList = []
for gene in geneDict:
    seq = geneDict[gene]
    rscu = RSCU([seq])
    gene_profile = RSCU_To_List(rscu)
    correlationList.append(
        testCorelation(gene_profile, RSCU_To_List(adultRSCU), "spearman"))

#print(correlationList)
plt.figure()
plt.xlabel("Correlation")
plt.xlim(0, 1)
plt.hist(correlationList, bins=20)
plt.show()
from Bio import SeqIO
from Bio.Seq import Seq
from CAI import RSCU
from matplotlib import pyplot as plt
import numpy as np

for seq_record in SeqIO.parse("KRas.gb", "genbank"):
    for f in seq_record.features:
        rscu_list = []
        if f.type == 'CDS':
            feature_seq = f.location.extract(seq_record).seq
            coding_result = Seq(str(feature_seq))
            # 新建一个列表,列表才能作为输入数据
            rscu_list.append(coding_result)
            #print(rscu_list)
            codon_pre = RSCU(rscu_list)
            #print(codon_pre)
        # 满足rscu不为空就退出
        if len(rscu_list):
            break
code = []
value = []

for k in codon_pre:
    code.append(k)
    values = codon_pre[k]
    value.append(values)

plt.switch_backend('Agg')
plt.figure(figsize=(20, 6), dpi=80)
width = 0.8