def write_as_fasta(self, fh, n=None): """----------------------------------------------------------------------------------------- Write to a file in fasta format, if n is defined, write only the specified ORF in the list :param fh, open filehandle for writing :param n: integer, index of ORF to write, write all if not specified :return: n -----------------------------------------------------------------------------------------""" fasta = Fasta() nwritten = 0 if n is None: # print all ORFS for orf in self.orf: fasta.id = orf['id'] fasta.doc = 'len={} strand={} frame={} begin={} end={}'. \ format(orf['length'], orf['direction'], orf['frame'], orf['begin'], orf['end']) fasta.seq = orf['sequence'] fh.write(fasta.format(linelen=60)) fh.write('\n') nwritten += 1 elif n < len(self.orf): # print the selected ORF orf = self.orf[n] fasta.id = orf['id'] fasta.doc = 'len={} strand={} frame={} begin={} end={}'. \ format(orf['length'], orf['direction'], orf['frame'], orf['begin'], orf['end']) fasta.seq = orf['sequence'] fh.write(fasta.format(linelen=60)) fh.write('\n') nwritten = 1 return nwritten
from plotter import Plotter match = Windowmatch() print('done {}'.format(type(match))) print(match.alphabet) # match.readNCBI('table/NUC4.4.matrix') print(match.format()) fasta1 = Fasta(filename=sys.argv[1]) fasta1.read() fasta2 = Fasta() fasta2.id = 'seq2' fasta2.doc = ' bases 1:50' fasta2.seq = fasta1.seq[:50] fasta1.seq = fasta1.seq[:200] match.s1 = fasta1 match.s2 = fasta2 l1, l2 = match.seqToInt() print(l1, l2) match.window = 10 match.threshold = 5 nmatch = match.windowScore() print('window: {} threshold: {} nmatch: {}'. \ format(match.window, match.threshold, nmatch)) plot = Plotter() plot.match = match
diagonal[d] = filtered return nmatch # -------------------------------------------------------------------------------------------------- # Testing # -------------------------------------------------------------------------------------------------- if __name__ == '__main__': print('\ntest 0: identity matching') print('\texpect 7 matches\n') fasta = Fasta() fasta.id = 'test0' fasta.doc = '5 letter DNA test' fasta.seq = 'ACAGT' print('{}\n'.format(fasta.format())) match = Match() match.s1 = fasta match.s2 = fasta nmatch = match.identityPos() print('matches: {}'.format(nmatch)) print('\ntest 1: identity matching, unequal length sequences') print('\texpect 11 matches\n') match = Match() fasta1 = Fasta() fasta1.id = 'test1.1' fasta1.doc = '5 letter DNA test'
nfeature += 1 elif info['Parent'] in flist: for k in info: if k not in flist[info['Parent']]: flist[info['Parent']][k] = info[k] else: # flist[info['ID']] = info sys.stderr.write('unknown feature {}\n'.format(info['feature'])) # write out sequences for gene in flist: thisgene = flist[gene] f = Fasta() f.id = thisgene['ID'] f.doc = '' for k in save: if k in thisgene: f.doc += ' {}:{}'.format(k, thisgene[k]) f.seq = seq[thisgene['seqname']][thisgene['begin'] - 1:thisgene['end']] if (thisgene['end'] - thisgene['begin'] > 100000): # coordinates cross origin f.seq = seq[thisgene['seqname']][thisgene['end'] - 1:] + seq[ thisgene['seqname']][:thisgene['begin']] if thisgene['strand'] == '-': f.seq = complement(f.seq) sys.stdout.write(f.format(linelen=100)) exit(0)
"phams":["56154"], "Start":15822, "Stop":16230, "Length":408, "Name":"24", "translation":"MTNVFTLDAMREETRKKYQPVKIGLSEDVTVELKPLLKLGKKAREAVADAVKEIEALPDEIDEDDEDSDELMDEVAEKICESIAKVFKLIATSPRKLLAELDTEEEPQIRAELYGAVLRTWMRET QLGEAAPSPN", "Orientation":"F", "Notes":"b'tail assembly chaperone'"} ... Michael Gribskov 10 April 2021 =================================================================================================""" import sys import json from sequence.fasta import Fasta # -------------------------------------------------------------------------------------------------- # main program # -------------------------------------------------------------------------------------------------- if __name__ == '__main__': fp = open(sys.argv[1], 'r') phage = json.load(fp) for gene in phage['results']: f = Fasta() f.id = gene['GeneID'] f.seq = gene['translation'] f.doc = gene['Notes'][2:-1] print(f.format(linelen=100)) exit(0)
base = base.replace('.seq', '') sys.stdout.write('\n\tExpanded file: {}\n\tbasename: {}\n'.format( infilename, base)) outfilename = base + '.fasta' outfile = None try: outfile = open(outfilename, 'w') except: sys.stderr.write( 'Unable to open output file ({})\n'.format(outfilename)) exit(2) # process all sequences in the file n = 0 for seq in infile: fasta = Fasta() fasta.id = base + '_{}'.format(n) fasta.seq = seq.rstrip().upper() fasta.doc = 'length={}'.format(fasta.length()) outfile.write(fasta.format(linelen=100)) n += 1 infile.close() outfile.close() sys.stdout.write('\t{} sequences written to {}\n'.format( n, outfilename)) # end of loop over files exit(0)