def excise_position(db_seq, db_lng, excise_beg, excise_end):
    excise_beg_limit = max2(1, excise_beg)
    excise_end_limit = min2(db_lng, excise_end)
    excise_lng = excise_end_limit - excise_beg_limit + 1
    excise_seq = substr(db_seq, excise_beg_limit - 1, excise_lng)

    return excise_seq
Exemplo n.º 2
0
def remove_adapter(_id, seq, prefix):
    seq = tr(seq, '[acgtun.]', '[ACGTTNN]')
    seq_clipped = None

    pattern = r'(\w+)' + prefix
    m = re.search(pattern, seq)
    if m:
        seq_clipped = m.groups()[0]
    elif substr(seq, 0, 6) == prefix:
        seq_clipped = prefix
    else:
        finish = 0

        while not finish and len(prefix) > 0:
            # ATTR: chop $prefix
            prefix = prefix[:-1]
            mm = re.search(r'(\w+){}$'.format(prefix), seq)
            if mm:
                seq_clipped = mm.groups()[0]
                finish = 1

    if not seq_clipped:
        seq_clipped = seq

    # print ">$id\n$seq_clipped\n";
    print('>{}\n{}'.format(_id, seq_clipped))
Exemplo n.º 3
0
def make_dir_tmp(pref, MAP):
    _today = datetime.datetime.now()
    _time = _today.strftime('%d_%m_%y_%H_%M_%S')
    MAP.write('\ntimestamp:\t{}\n\n'.format(_time))
    num = random.random()
    chance = substr(str(num), 2, 10)

    _dir = 'dir_mapper{}_{}_{}'.format(pref, chance, _time)
    os.mkdir(_dir)
    return _dir
def excise_struct(struct, beg, end, strand):
    global db_old
    lng = len(struct)

    # begin can be equal to end if only one nucleotide is excised
    if not (beg <= end):
        print_stderr(
            'begin can not be greater than end for {}\n'.format(db_old))
        sys.exit(0)

    # rarely, permuted combinations of signature and structure cause out of bound excision errors.
    # this happens once appr. every two thousand combinations
    if not (beg <= len(struct)):
        return 0

    # the blast parsed format is 1-indexed, substr is 0-indexed
    sub_struct = substr(struct, beg - 1, end - beg + 1)

    return sub_struct
def excise_seq(seq, beg, end, strand):
    '''
    excise sub sequence from the potential precursor
    '''
    global db_old

    # begin can be equal to end if only one nucleotide is excised
    if not (beg <= end):
        print_stderr('begin can not greater than end for {}\n'.format(db_old))
        sys.exit(0)

    # rarely, permuted combinations of signature and structure cause out of bound excision errors.
    # this happens once appr. every two thousand combinations
    if not (beg <= len(seq)):
        return 0

    # the blast parsed format is 1-indexed, substr is 0-indexed
    sub_seq = substr(seq, beg - 1, end - beg + 1)

    # if on the minus strand, the reverse complement should be returned
    if strand == "-":
        sub_seq = revcom(sub_seq)

    return sub_seq
Exemplo n.º 6
0
                if mm:
                    remove_adapter(_id, seq, prefix)
                    _id = mm.groups()[0]
                    seq = ''
                    continue

                seq += ll

    remove_adapter(_id, seq, prefix)
    FASTA.close()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('file_fasta')
    parser.add_argument('seq_adapter')

    if len(sys.argv) != 3:
        print(usage)
        sys.exit(-1)

    args = parser.parse_args(sys.argv[1:3])
    file_fasta = args.file_fasta
    seq_adapter = args.seq_adapter
    seq_test = "TCGTATGCCGTCTTCTGCTTGT"

    prefix = substr(seq_adapter, 0, 6)
    prefix = tr(prefix, '[acgtun.]', '[ACGTTNN]')
    remove_adapters(file_fasta, prefix)
    sys.exit(0)