def excise_position(db_seq, db_lng, excise_beg, excise_end): excise_beg_limit = max2(1, excise_beg) excise_end_limit = min2(db_lng, excise_end) excise_lng = excise_end_limit - excise_beg_limit + 1 excise_seq = substr(db_seq, excise_beg_limit - 1, excise_lng) return excise_seq
def remove_adapter(_id, seq, prefix): seq = tr(seq, '[acgtun.]', '[ACGTTNN]') seq_clipped = None pattern = r'(\w+)' + prefix m = re.search(pattern, seq) if m: seq_clipped = m.groups()[0] elif substr(seq, 0, 6) == prefix: seq_clipped = prefix else: finish = 0 while not finish and len(prefix) > 0: # ATTR: chop $prefix prefix = prefix[:-1] mm = re.search(r'(\w+){}$'.format(prefix), seq) if mm: seq_clipped = mm.groups()[0] finish = 1 if not seq_clipped: seq_clipped = seq # print ">$id\n$seq_clipped\n"; print('>{}\n{}'.format(_id, seq_clipped))
def make_dir_tmp(pref, MAP): _today = datetime.datetime.now() _time = _today.strftime('%d_%m_%y_%H_%M_%S') MAP.write('\ntimestamp:\t{}\n\n'.format(_time)) num = random.random() chance = substr(str(num), 2, 10) _dir = 'dir_mapper{}_{}_{}'.format(pref, chance, _time) os.mkdir(_dir) return _dir
def excise_struct(struct, beg, end, strand): global db_old lng = len(struct) # begin can be equal to end if only one nucleotide is excised if not (beg <= end): print_stderr( 'begin can not be greater than end for {}\n'.format(db_old)) sys.exit(0) # rarely, permuted combinations of signature and structure cause out of bound excision errors. # this happens once appr. every two thousand combinations if not (beg <= len(struct)): return 0 # the blast parsed format is 1-indexed, substr is 0-indexed sub_struct = substr(struct, beg - 1, end - beg + 1) return sub_struct
def excise_seq(seq, beg, end, strand): ''' excise sub sequence from the potential precursor ''' global db_old # begin can be equal to end if only one nucleotide is excised if not (beg <= end): print_stderr('begin can not greater than end for {}\n'.format(db_old)) sys.exit(0) # rarely, permuted combinations of signature and structure cause out of bound excision errors. # this happens once appr. every two thousand combinations if not (beg <= len(seq)): return 0 # the blast parsed format is 1-indexed, substr is 0-indexed sub_seq = substr(seq, beg - 1, end - beg + 1) # if on the minus strand, the reverse complement should be returned if strand == "-": sub_seq = revcom(sub_seq) return sub_seq
if mm: remove_adapter(_id, seq, prefix) _id = mm.groups()[0] seq = '' continue seq += ll remove_adapter(_id, seq, prefix) FASTA.close() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('file_fasta') parser.add_argument('seq_adapter') if len(sys.argv) != 3: print(usage) sys.exit(-1) args = parser.parse_args(sys.argv[1:3]) file_fasta = args.file_fasta seq_adapter = args.seq_adapter seq_test = "TCGTATGCCGTCTTCTGCTTGT" prefix = substr(seq_adapter, 0, 6) prefix = tr(prefix, '[acgtun.]', '[ACGTTNN]') remove_adapters(file_fasta, prefix) sys.exit(0)