Exemplo n.º 1
0
def get_chromosome_lengths(rerence_multifasta):
    ''' Get chromosome lengths.
    @param rerence_multifasta: multifasta file with chromosomes
    @return: dictionary chr name -> chr length
    '''
    print "Read reference genome"
    chrs = {}
    for seq_obj in sc_iter_fasta(rerence_multifasta):
        chrs[seq_obj.seq_gi] = seq_obj.seq_length 
    print chrs
Exemplo n.º 2
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#@created: 10.10.2013
#@author: Aleksey Komissarov
#@contact: [email protected]

import sys
from trseeker.seqio.fasta_file import sc_iter_fasta
import argparse

if __name__ == '__main__':

	parser = argparse.ArgumentParser(description='Parse multi fasta.')
	parser.add_argument('-i','--input', help='Fasta input', required=True)
	parser.add_argument('-o','--output', help='Output prefix', required=True)
	args = vars(parser.parse_args())


	fasta = args["input"]
	output = args["output"]

	for i, seq_obj in enumerate(sc_iter_fasta(fasta, lower=False)):
		name = seq_obj.header.split()[0]
		print name
		with open("%s.%s.fa" % (output, name), "w") as fh:
			fh.write(seq_obj.fasta)
Exemplo n.º 3
0
    jf_path = args["jf"]

    use_new = bool(args["new"])

    k = int(args["k"])
    c = int(args["cutoff"])

    fh = open(args["output"], "w")

    if jf_api and use_new:
        jf_api = jellyfish.QueryMerFile(jf_path)
        kmer2freq = Kmer2tfAPI(jf_api)


    for sid, seq_obj in enumerate(sc_iter_fasta(input_fasta)):
        print seq_obj.header, "Length:", seq_obj.length
        sequence = seq_obj.sequence.upper()
        n = len(sequence)
        kmers = set()
        for i in xrange(n-k+1):
            kmer= sequence[i:i+k]
            tf = kmer2freq[kmer]
            if tf > 0:
                print i, tf
        raw_input("Next item?")


    #         kmers.add()
    #         kmers.add('A' + sequence[i:i+k-1])
    #         kmers.add('C' + sequence[i:i+k-1])
Exemplo n.º 4
0
    settings = {
        "index_prefix":
        "/mnt/guatemala/akomissarov/Boechera_spatifolia/raw.23.L3",
        "aindex_prefix":
        "/mnt/guatemala/akomissarov/Boechera_spatifolia/raw.23.L3",
        "reads_file":
        "/mnt/guatemala/akomissarov/Boechera_spatifolia/raw.reads",
        "gene_fasta": "/mnt/guatemala/akomissarov/Boechera_spatifolia/apr1.fa",
    }

    k = 23
    index = load_aindex(settings)

    used_reads = set()
    results = []
    for seq_obj in sc_iter_fasta(settings["gene_fasta"]):

        for i in xrange(seq_obj.length - k + 1):
            kmer = seq_obj.sequence[i:i + k]
            tf = index[kmer]
            if not tf:
                continue

            print i, kmer, tf

            hits = []
            for data in get_reads_se_by_kmer(kmer, index, used_reads):
                start, next_read_start, subread, pos, spring_pos, was_reversed, poses_in_read = data
                used_reads.add((start, spring_pos))
                hits.append([pos, 0, subread, poses_in_read, was_reversed])
            if not hits:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#@created: 10.10.2013
#@author: Aleksey Komissarov
#@contact: [email protected]

import sys
from trseeker.seqio.fasta_file import sc_iter_fasta
import argparse

if __name__ == '__main__':

	parser = argparse.ArgumentParser(description='Check presence of adapter kmers.')
	parser.add_argument('-i','--input', help='Fasta input', required=True)
	parser.add_argument('-o','--output', help='Fixed output', required=True)
	args = vars(parser.parse_args())

	
	fasta = args["input"]
	output = args["output"]
	
	with open(output, "w") as fh:
		for i, seq_obj in enumerate(sc_iter_fasta(fasta)):
			print i, "fix", seq_obj.seq_head
			seq_obj.seq_head = ">%s\n" % i
			fh.write(seq_obj.fasta)