Python MultipleSeqAlignment.sort примеры использования

Язык программирования: Python

Пространство имен/Пакет: Bio.Align

Класс/Тип: MultipleSeqAlignment

Метод/Функция: sort

Примеров на hotexamples.com: 3

Python MultipleSeqAlignment.sort - 3 примера найдено. Это лучшие примеры Python кода для Bio.Align.MultipleSeqAlignment.sort, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

append(30)

MultipleSeqAlignment(30)

get_alignment_length(29)

extend(13)

add_sequence(12)

format(9)

_annotations(7)

__init__(4)

sort(2)

ref_ungapped(1)

ref(1)

pd(1)

description(1)

molecule_type(1)

__add__(1)

_version(1)

_star_info(1)

__getitem__(1)

values(1)

Пример #1

Показать файл

Файл: kralign.py Проект: karolisr/PhyloMill

def concatenate(alignments, padding_length=0, partitions=None):

    '''
    Concatenate alignments based on the Seq ids; row order does not
    matter. If one alignment contains a Seq id that another one does
    not, gaps will be introduced in place of the missing Seq.

    Args:
        alignments: (tuple, list) Alignments to be concatenated.

        padding_length: Introduce this many gaps between concatenated
            alignments.
    '''

    from Bio import Alphabet
    from Bio.Seq import Seq
    from Bio.SeqRecord import SeqRecord
    from Bio.Align import MultipleSeqAlignment
    if not isinstance(alignments, (list, tuple)):
        raise ValueError('Argument must be a list or a tuple.')
    elif len(alignments) == 1:
        return alignments[0]
    if isinstance(alignments, tuple):
        alignments = list(alignments)
    aln1 = None
    aln2 = None
    if len(alignments) > 2:
        aln2 = alignments.pop()
        result1 = concatenate(alignments=alignments,
                              padding_length=padding_length,
                              partitions=partitions)
        aln1 = result1[0]
        partitions = result1[1]
    elif len(alignments) == 2:
        aln1 = alignments[0]
        aln2 = alignments[1]
    if (not isinstance(aln1, MultipleSeqAlignment) or
            not isinstance(aln2, MultipleSeqAlignment)):
        raise ValueError(
            'Argument must inherit from Bio.Align.MultipleSeqAlignment.')
    alphabet = Alphabet._consensus_alphabet([aln1._alphabet, aln2._alphabet])
    aln1_dict = dict()
    aln2_dict = dict()
    for aln1_s in aln1:
        aln1_dict[aln1_s.id] = aln1_s
    for aln2_s in aln2:
        aln2_dict[aln2_s.id] = aln2_s
    aln1_length = aln1.get_alignment_length()
    aln2_length = aln2.get_alignment_length()
    aln1_gaps = SeqRecord(Seq('-' * aln1_length, alphabet))
    aln2_gaps = SeqRecord(Seq('-' * aln2_length, alphabet))
    padding = SeqRecord(Seq('N' * padding_length, alphabet))

    if not partitions:
        partitions = [(1, aln1_length)]
    partitions.append((1 + aln1_length, padding_length + aln1_length + aln2_length))

    result_seq_list = list()
    for aln1_key in aln1_dict.keys():
        merged_Seq = None
        if aln1_key in aln2_dict:
            merged_Seq = aln1_dict[aln1_key] + padding + aln2_dict[aln1_key]
            merged_Seq.id = aln1_dict[aln1_key].id
            merged_Seq.name = ''
            merged_Seq.description = ''
            aln2_dict.pop(aln1_key)
        else:
            aln1_seq_record = aln1_dict[aln1_key]
            merged_Seq = aln1_seq_record + padding + aln2_gaps
            merged_Seq.id = aln1_seq_record.id
            merged_Seq.name = ''
            merged_Seq.description = ''
        result_seq_list.append(merged_Seq)
    for aln2_seq_record in aln2_dict.values():
        merged_Seq = aln1_gaps + padding + aln2_seq_record
        merged_Seq.id = aln2_seq_record.id
        merged_Seq.name = ''
        merged_Seq.description = ''
        result_seq_list.append(merged_Seq)
    result_alignment = MultipleSeqAlignment(result_seq_list, alphabet)
    result_alignment.sort()
    return((result_alignment, partitions))

Пример #2

Показать файл

    dictionary[key] = valuelist[0:10]
from pprint import pprint

fielddict_file = open("global.dict", "w")
pprint(dictionary, fielddict_file)
fielddict_file.close()

reference = []

for i, j in dictionary.iteritems():
    n = 0
    combined_seq = MultipleSeqAlignment([
        SeqRecord(Seq('', generic_dna), id="hg19"),
        SeqRecord(Seq('', generic_dna), id="panTro4"),
        SeqRecord(Seq('', generic_dna), id="gorGor3"),
        SeqRecord(Seq('', generic_dna), id="rheMac3"),
        SeqRecord(Seq('', generic_dna), id="ponAbe2")
    ])
    combined_seq.sort()
    for ref in j:
        n = n + 1
        seq_records = AlignIO.read(ref, 'fasta')
        seq_records.description = ""
        seq_records.sort()
        combined_seq = combined_seq + seq_records
        combined_seq.description = ""
    with open('%s.ref' % i, 'w') as write_file:
        AlignIO.write(combined_seq, write_file, 'fasta')
    referencelist = open('reference.list', 'a')
    referencelist.write('%s\t%i\n' % (i, n))

Пример #3

Показать файл

Файл: virus_clean.py Проект: 5l1v3r1/treetool

class virus_clean(object):
	"""docstring for virus_clean"""
	def __init__(self,n_iqd  = 5, **kwargs):
		'''
		parameters
		n_std	-- number of interquartile distances accepted in molecular clock filter 
		'''
		self.n_iqd = n_iqd

	def remove_insertions(self):
		'''
		remove all columns from the alignment in which the outgroup is gapped
		'''
		outgroup_ok = np.array(self.sequence_lookup[self.outgroup['strain']])!='-'
		for seq in self.viruses:
			seq.seq = Seq("".join(np.array(seq.seq)[outgroup_ok]).upper())

	def clean_gaps(self):
		'''
		remove viruses with gaps -- not part of the standard pipeline
		'''
		self.viruses = filter(lambda x: '-' in x.seq, self.viruses)

	def clean_ambiguous(self):
		'''
		substitute all ambiguous characters with '-', 
		ancestral inference will interpret this as missing data
		'''
		for v in self.viruses:
			v.seq = Seq(re.sub(r'[BDEFHIJKLMNOPQRSUVWXYZ]', '-',str(v.seq)))

	def unique_date(self):
		'''
		add a unique numerical date to each leaf. uniqueness is achieved adding a small number
		'''
		from date_util import numerical_date
		og = self.sequence_lookup[self.outgroup['strain']]
		if hasattr(og, 'date'):
			try:
				og.num_date = numerical_date(og.date)
			except:
				print "cannot parse date"
				og.num_date="undefined";
		for ii, v in enumerate(self.viruses):
			if hasattr(v, 'date'):
				try:
					v.num_date = numerical_date(v.date, self.date_format['fields']) + 1e-7*(ii+1)
				except:
					print "cannot parse date"
					v.num_date="undefined";

	def times_from_outgroup(self):
		outgroup_date = self.sequence_lookup[self.outgroup['strain']].num_date
		return np.array([x.num_date-outgroup_date for x in self.viruses  if x.strain])

	def distance_from_outgroup(self):
		from seq_util import hamming_distance
		outgroup_seq = self.sequence_lookup[self.outgroup['strain']].seq
		return np.array([hamming_distance(x.seq, outgroup_seq) for x in self.viruses if x.strain])

	def clean_distances(self):
		"""Remove viruses that don't follow a loose clock """
		times = self.times_from_outgroup()
		distances = self.distance_from_outgroup()
		slope, intercept, r_value, p_value, std_err = stats.linregress(times, distances)
		residuals = slope*times + intercept - distances
		r_iqd = stats.scoreatpercentile(residuals,75) - stats.scoreatpercentile(residuals,25)
		if self.verbose:
			print "\tslope: " + str(slope)
			print "\tr: " + str(r_value)
			print "\tresiduals iqd: " + str(r_iqd)
		new_viruses = []
		for (v,r) in izip(self.viruses,residuals):
			# filter viruses more than n_std standard devitations up or down
			if np.abs(r)<self.n_iqd * r_iqd or v.id == self.outgroup["strain"]:
				new_viruses.append(v)
			else:
				if self.verbose>1:
					print "\t\tresidual:", r, "\nremoved ",v.strain
		self.viruses = MultipleSeqAlignment(new_viruses)

	def clean_generic(self):
		print "Number of viruses before cleaning:",len(self.viruses)
		self.unique_date()
		self.remove_insertions()
		self.clean_ambiguous()
		self.clean_distances()
		self.viruses.sort(key=lambda x:x.num_date)
		print "Number of viruses after outlier filtering:",len(self.viruses)