def get_other_consensi_all(pname, fragment, VERBOSE=0): '''Get all other consensi except the ones from this patient''' from Bio import SeqIO from hivwholeseq.patients.filenames import get_consensi_alignment_filename fn = get_consensi_alignment_filename('all', fragment) consensi = [] for seq in SeqIO.parse(fn, 'fasta'): # NOTE: the first of seq.name.split('_') is the patient name if seq.name.split('_')[0] != pname: seq.seq = seq.seq.ungap('-') consensi.append(seq) return consensi
def get_other_consensi_all(pname, fragment, VERBOSE=0): '''Get all other consensi except the ones from this patient''' from Bio import SeqIO from hivwholeseq.patients.filenames import get_consensi_alignment_filename fn = get_consensi_alignment_filename('all', fragment) consensi = [] for seq in SeqIO.parse(fn, 'fasta'): # NOTE: the first of seq.name.split('_') is the patient name if seq.name.split('_')[0] != pname: seq.seq = seq.seq.ungap('-') consensi.append(seq) return consensi
def get_consensi_alignment_filename(self, region, format='fasta'): '''Get the filename of the multiple sequence alignment of all consensi''' from hivwholeseq.patients.filenames import get_consensi_alignment_filename return get_consensi_alignment_filename(self.name, region, format=format)
samples = load_samples_sequenced() if seq_runs is not None: samples = samples.loc[samples['seq run'].isin(seq_runs)] if adaIDs is not None: samples = samples.loc[samples.adapter.isin(adaIDs)] if use_pats: samples = samples.loc[samples['patient sample'] != 'nan'] else: samples = samples.loc[samplenames] if fragments is None: fragments = ['F'+str(i+1) for i in xrange(6)] alis = {fr: AlignIO.read(get_consensi_alignment_filename('all', fr), 'fasta') for fr in fragments} for samplename, sample in samples.iterrows(): sample = SampleSeq(sample) data_folder = sample.seqrun_folder adaID = sample.adapter pname = sample.patientname for fragment in fragments: if VERBOSE >= 1: print sample['seq run'], adaID, fragment, samplename, # Read the summary filename of the filter_mapped, and find out whether # there are many distant reads (a few are normal) fn = get_filter_mapped_summary_filename(data_folder, adaID, fragment)
'subtype', 'confidence'), ) write_json(tree_json, fn, indent=1) if use_joint: if VERBOSE >= 2: print 'Align all patients', ali_all = align_muscle(*seqs_all, sort=True) if VERBOSE >= 2: print 'OK' if use_save: if VERBOSE >= 2: print 'Save all patients', reg_tmp = '_'.join(pcodes) + '_' + region fn_out = get_consensi_alignment_filename('all', reg_tmp) mkdirs(os.path.dirname(fn_out)) AlignIO.write(ali, fn_out, 'fasta') if VERBOSE >= 2: print 'OK' if VERBOSE >= 2: print 'Build local tree' tree = build_tree_fasttree(ali_all, VERBOSE=VERBOSE) if VERBOSE >= 2: print 'Infer ancestral sequences' a = ancestral_sequences(tree, ali_all, alphabet='ACGT-N', copy_tree=False,
) write_json(tree_json, fn, indent=1) if use_joint: if VERBOSE >= 2: print 'Align all patients', ali_all = align_muscle(*seqs_all, sort=True) if VERBOSE >= 2: print 'OK' if use_save: if VERBOSE >= 2: print 'Save all patients', reg_tmp = '_'.join(pcodes)+'_'+region fn_out = get_consensi_alignment_filename('all', reg_tmp) mkdirs(os.path.dirname(fn_out)) AlignIO.write(ali, fn_out, 'fasta') if VERBOSE >= 2: print 'OK' if VERBOSE >= 2: print 'Build local tree' tree = build_tree_fasttree(ali_all, VERBOSE=VERBOSE) if VERBOSE >= 2: print 'Infer ancestral sequences' a = ancestral_sequences(tree, ali_all, alphabet='ACGT-N', copy_tree=False, attrname='sequence', seqtype='str') a.calc_ancestral_sequences() a.cleanup_tree()
def get_consensi_alignment_filename(self, region, format='fasta'): '''Get the filename of the multiple sequence alignment of all consensi''' from hivwholeseq.patients.filenames import get_consensi_alignment_filename return get_consensi_alignment_filename(self.name, region, format=format)