def collect_contigs(dataset, output_dir, output_base, format):
    output = open(output_base + "." + format, "w")
    for barcode in dataset:
        file = os.path.join(output_dir, barcode.id, "truseq_long_reads." + format)
        if os.path.exists(file):
            contigs = SeqIO.parse(open(file), format)
            for contig in contigs:
                contig.id = barcode.id + "-" + contig.id
                SeqIO.write(contig, output, format)
    output.close()
Exemplo n.º 2
0
def Generate(input, output, numins, numdel):
    reference = list(input)
    result = "".join([ch.seq for ch in reference])
    l = sum([len(ch) for ch in reference])
    ins = GroupByChrom(GenerateInsertions(numins, result), reference)
    d = GroupByChrom(GenerateDeletions(numdel, result), reference)
    for ch_ins, ch_d, chrom in itertools.izip(ins, d, reference):
        sys.stdout.write("Chromosome " + chrom.id + "\n")
        rec = SeqIO.SeqRecord(Apply(chrom.seq, ch_ins, ch_d), chrom.id)
        SeqIO.write(rec, output, "fasta")
Exemplo n.º 3
0
def demultiplex(f, barcodes, extract_barcode):
    d = Demultiplexer(BarcodeWriterFactory(sys.argv[2]), barcodes,
                      extract_barcode)
    f = SeqIO.parse_fastq(SeqIO.Open(input_file, "r"))
    cnt = 0
    for record in f:
        cnt += 1
        d.write(record)
    d.close()
    f.close()
Exemplo n.º 4
0
def PrintResults(recs, reference, references_file, coordinates_file):
    aln = open(coordinates_file, "w")
    fasta = open(references_file, "w")
    for rec in recs:
        aln.write(str(rec) + "\n")
        sequence = reference[rec.rname][rec.left:rec.right]
        rec_id = str(rec.rname) + "_(" + str(rec.left) + "-" + str(rec.right)+")"
        SeqIO.write(SeqIO.SeqRecord(sequence, rec_id), fasta, "fasta")
    aln.close()
    fasta.close()
def PrintResults(recs, reference, references_file, coordinates_file):
    aln = open(coordinates_file, "w")
    fasta = open(references_file, "w")
    for rec in recs:
        aln.write(str(rec) + "\n")
        sequence = reference[rec.rname][rec.left:rec.right]
        rec_id = str(rec.rname) + "_(" + str(rec.left) + "-" + str(
            rec.right) + ")"
        SeqIO.write(SeqIO.SeqRecord(sequence, rec_id), fasta, "fasta")
    aln.close()
    fasta.close()
Exemplo n.º 6
0
def collect_contigs(dataset, barcodes_dir, output_base, format):
    output = open(output_base + "." + format, "w")
    for barcode in dataset:
        file = os.path.join(barcodes_dir, barcode.id,
                            "truseq_long_reads." + format)
        if os.path.exists(file):
            contigs = SeqIO.parse(open(file), format)
            for contig in contigs:
                contig.id = barcode.id + "-" + contig.id
                SeqIO.write(contig, output, format)
    output.close()
Exemplo n.º 7
0
def CollectBarcodes(f, extract_barcode):
    h = SeqIO.parse_fastq(SeqIO.Open(f, "r"))
    cnt = 0
    counter = dict()
    for record in h:
        barcode = extract_barcode(record)
        if barcode not in counter:
            counter[barcode] = 0
        counter[barcode] += 1
        cnt += 1
        if cnt == 100000:
            break
    return [barcode for barcode, count in counter.iteritems() if count >= 100]
Exemplo n.º 8
0
def getAssemblyLength(assembly_fasta):
    """
    Return the total length of the assembly of the given assembly fasta file.
    If an abundance file (for metagenomic analysis) is specified, we have to 
    multiple each contig by its nominal abundance.
    """
    assembly_length = 0.0
    pf = SeqIO.ParseFasta(assembly_fasta)
    tuple = pf.getRecord()
    while tuple is not None:
        #print contig_abundance[tuple[0].split(' ')[0]]
        assembly_length += contig_abundance[tuple[0].split(' ')[0]] * len(
            tuple[1])
        if debug_level > 0:
            sys.stderr.write('Contig ' + tuple[0].split(' ')[0] + 'length: ' +
                             str(assembly_length) + '\n')
        tuple = pf.getRecord()
    """ DEPRECIATED: uses biopython
    handle = open(assembly_fasta, "rU")
    for record in SeqIO.parse(handle, "fasta") :
        assembly_length += len(record.seq)

    handle.close()
    """
    return assembly_length
def break_contigs(contigs_file, sam_file, output_file):
    contigs = list(SeqIO.parse(open(contigs_file, "rU"), "fasta"))
    # sam = sam_parser.SamChain([sam_parser.Samfile(sam_file) for sam_file in sam_files])
    sam = sam_parser.Samfile(sam_file)
    # last two arguments: K, min0 stretch length to break
    coverage_breaker = break_by_coverage.ContigBreaker(contigs, sam, 100, 50)
    coverage_breaker.OutputBroken(output_file)
Exemplo n.º 10
0
def build_kmer_to_contig_index(assembly_filename, kmer_size):
    """
    Return a dictionary of mappings from {kmer: {contig: [location_1, location_2, ...]} }
    """

    # Kmer inverted index
    kmer_ii = defaultdict(list)

    # Contig lengths
    contig_lengths = defaultdict(int)

    pf = SeqIO.ParseFasta(assembly_filename)
    tuple = pf.getRecord()
    kmer_revcomp = None
    kmer = None
    while tuple is not None:

        for i in xrange(0, len(tuple[1]) - kmer_size + 1):
            kmer = tuple[1][i:i + kmer_size]
            kmer_revcomp = revcompl(kmer)
            if kmer < kmer_revcomp:
                kmer_ii[kmer].append((tuple[0], i + 1))
            else:
                kmer_ii[kmer_revcomp].append((tuple[0], i + 1))

        contig_lengths[tuple[0]] = len(tuple[1])
        tuple = pf.getRecord()

    return kmer_ii, contig_lengths
Exemplo n.º 11
0
 def test_lazy_load_chunks_nav(self):
     data = SeqIO.load("seqImage/12-55-58.276.seq",
                       lazy=True,
                       chunk_shape=(2, 2),
                       nav_shape=(4, 5))
     print(data)
     data.plot()
     assert isinstance(data, LazySignal2D)
Exemplo n.º 12
0
 def test_celeritas(self):
     import numpy as np
     data = SeqIO.load_celeritas(
         top='/media/hdd/home/PtNW_100fps_Top_16-32-11.473.seq',
         bottom='/media/hdd/home/PtNW_100fps_Bottom_16-32-11.508.seq')
     print("the data shape", np.shape(data.data))
     data.sum().plot()
     plt.show()
Exemplo n.º 13
0
    def test_celeritas_lazy(self):
        data = SeqIO.load_celeritas(
            top='/media/hdd/home/1000FPS SS7 200x200/top.seq',
            bottom='/media/hdd/home/1000FPS SS7 200x200/bottom.seq',
            xml_file='/media/hdd/home/1000FPS SS7 200x200/metadata.xml',
            metadata='/media/hdd/home/1000FPS SS7 200x200/metadata.metadata',
            lazy=True,
            nav_shape=(200, 200),
            chunk_shape=(10, 10))
        print(data.data)

        print(data.metadata)
        print(data.axes_manager)
Exemplo n.º 14
0
def build_read_kmers_index(reads_filename, kmer_ii, kmer_size):
    """
    Return two dictionaries.  One contains the counts of ambiguous k-mers, while the other
    contains the number of unique k-mers that map to a given contig.
    """

    ambiguous_kmer_counts = defaultdict(int)
    contig_counts = defaultdict(int)

    pf = SeqIO.ParseFastQ(reads_filename)
    #tuple = pf.getNextReadSeq()
    kmer = None
    contig = None
    contigs_containing_kmer = []
    unalignable_kmers = 0
    #total_abundance = 0
    num_reads = 0
    sum = 0
    for tuple in pf:

        # For each k-mer in the read...
        for i in xrange(0, len(tuple[1]) - kmer_size + 1):

            # ... find what contigs contain it.
            kmer = tuple[1][i:i + kmer_size]
            if kmer in kmer_ii or revcompl(kmer) in kmer_ii:
                if kmer not in kmer_ii:
                    kmer = revcompl(kmer)

                # and randomly assign the count to one of the items.
                contigs_containing_kmer = accumulate(kmer_ii[kmer])

                #print kmer +'\t',
                contigs_containing_kmer = list(contigs_containing_kmer)

                if len(contigs_containing_kmer) > 1:
                    ambiguous_kmer_counts[kmer] += 1
                else:
                    contig_counts[contigs_containing_kmer[0][0]] += 1
            else:
                unalignable_kmers += 1

        if num_reads % 100000 == 0:
            sys.stderr.write('Processed reads:\t' + str(num_reads) + '\r')

        sum += len(tuple[1])
        num_reads += 1

    return ambiguous_kmer_counts, contig_counts, sum / num_reads
def main():
    # Params
    parser = argparse.ArgumentParser()
    parser.add_argument("--refseq_in", required=True, help="RefSeq fasta file", type=str)
    parser.add_argument("--tss_in", required=True, help="RefSeq fasta file", type=str)
    parser.add_argument("--wigs_in", required=True,
                        help="Term-Seq coverage file(s) (.wig), Must contain forward and reverse files", type=str)
    parser.add_argument("--gff_out", required=True, help="GFF output file name for terminators", type=str)
    parser.add_argument("--distance", required=True, help="Distance to look for terminator after a TSS", type=int)
    args = parser.parse_args()

    # ---------------------------
    print("Loading sequence file...")
    fasta_parsed = SeqIO.parse(glob.glob(args.refseq_in)[0], "fasta")
    wig_files = glob.glob(args.wigs_in)
    f_wigs_parsed, r_wigs_parsed = WM(wig_files, fasta_parsed).build_matrix()
    accession = ""

    # The following line is repeated due to the previous iterator exhaustion
    fasta_parsed = SeqIO.parse(glob.glob(args.refseq_in)[0], "fasta")
    for seq_record in fasta_parsed:
        f_seq_str = str(seq_record.seq)
        accession = seq_record.id
        print(f_wigs_parsed[accession].to_string())
Exemplo n.º 16
0
def moleculo_postprocessing(contigs_file, output_file, sam_files, log):
    log.info("===== Starting postprocessing based on read alignment")
    log.info("Processing scaffolds from " + contigs_file)
    log.info("Using read alignments to break and filter scaffolds")
    contigs = list(SeqIO.parse(open(contigs_file, "rU"), "fasta"))
    sam = sam_parser.SamChain([sam_parser.Samfile(sam_file) for sam_file in sam_files])
    generate_quality.GenerateQuality(contigs, sam)
    pattern_filter = moleculo_filter_contigs.PatternContigFilter(contigs, sam, pattern, rc_pattern)
    length_filter = moleculo_filter_contigs.ContigLengthFilter(1500)
    coverage_breaker = break_by_coverage.ContigBreaker(contigs, sam, 100, 50)
    pattern_breaker = break_by_coverage.PatternBreaker(pattern, rc_pattern, 150)
    n_breaker = break_by_coverage.NBreaker(3)
    result = SplitAndFilter(contigs, coverage_breaker, length_filter, n_breaker, pattern_breaker, pattern_filter)
    OutputResults(output_file, "fasta", result)
    OutputResults(output_file, "fastq", result)
    log.info("===== Postprocessing finished. Results can be found in " + output_file + ".fastq")
Exemplo n.º 17
0
def assign_read_kmers_to_contigs(reads_filename, kmer_ii, kmer_size):
    """
    Given a set of reads and k-mer length, assign k-mer counts to the contigs.
    """

    contig_counts = defaultdict(int)

    pf = SeqIO.ParseFastQ(reads_filename)
    #tuple = pf.getNextReadSeq()
    kmer = None
    contig = None
    unalignable_kmers = 0
    num_reads = 0
    sum = 0
    for tuple in pf:
        #while tuple is not None:

        # For each k-mer in the read...
        for i in xrange(0, len(tuple[1]) - kmer_size + 1):

            # ... find what contigs contain it.
            kmer = tuple[1][i:i + kmer_size]
            if kmer in kmer_ii:
                # and randomly assign the count to one of the items.
                contig = random.choice(kmer_ii[kmer])[0]
                contig_counts[contig] += 1

            elif revcompl(kmer) in kmer_ii:
                contig = random.choice(kmer_ii[revcompl(kmer)])[0]
                contig_counts[contig] += 1

            else:
                unalignable_kmers += 1

        sum += len(tuple[1])
        num_reads += 1

    #    tuple = pf.getNextReadSeq()

    #print 'Unalignable k-mers:\t' + str(unalignable_kmers)
    return contig_counts, sum / num_reads
Exemplo n.º 18
0
def moleculo_postprocessing(contigs_file, output_file, sam_files, log):
    log.info("===== Starting postprocessing based on read alignment")
    log.info("Processing scaffolds from " + contigs_file)
    log.info("Using read alignments to break and filter scaffolds")
    contigs = list(SeqIO.parse(open(contigs_file, "rU"), "fasta"))
    sam = sam_parser.SamChain(
        [sam_parser.Samfile(sam_file) for sam_file in sam_files])
    generate_quality.GenerateQuality(contigs, sam)
    pattern_filter = moleculo_filter_contigs.PatternContigFilter(
        contigs, sam, pattern, rc_pattern)
    length_filter = moleculo_filter_contigs.ContigLengthFilter(1500)
    coverage_breaker = break_by_coverage.ContigBreaker(contigs, sam, 100, 50)
    pattern_breaker = break_by_coverage.PatternBreaker(pattern, rc_pattern,
                                                       150)
    n_breaker = break_by_coverage.NBreaker(3)
    result = SplitAndFilter(contigs, coverage_breaker, length_filter,
                            n_breaker, pattern_breaker, pattern_filter)
    OutputResults(output_file, "fasta", result)
    OutputResults(output_file, "fastq", result)
    log.info("===== Postprocessing finished. Results can be found in " +
             output_file + ".fastq")
Exemplo n.º 19
0
 def find(self, reference_dir):
     files = [
         os.path.join(reference_dir, file)
         for file in os.listdir(reference_dir)
         if os.path.isfile(os.path.join(reference_dir, file))
         and file.endswith("fasta")
     ]
     for file in files:
         sys.stdout.write("Processing file " + file + "\n")
         for rec in SeqIO.parse_fasta(open(file, "r")):
             lines = filter(None, re.split("[\-()]", rec.id))
             #                print lines
             chr = lines[0][3:-1]
             left = int(lines[1])
             right = int(lines[2])
             #                print chr, left, right
             for variation in self.variations:
                 if variation.Intersect(chr, left, right):
                     sys.stdout.write(
                         "Segment chr_{0}_({1}-{2}) intersected with variation ({3}, {4}, {5}, {6}) \n"
                         .format(chr, left, right, variation.chr,
                                 variation.start, variation.end,
                                 variation.allele))
Exemplo n.º 20
0
 def test_lazy_load_chunks(self):
     data = SeqIO.load("seqImage/12-55-58.276.seq",
                       lazy=True,
                       chunk_shape=5)
     print(data.data)
     assert isinstance(data, LazySignal2D)
Exemplo n.º 21
0
import numpy
import SeqIO from Bio

clusters=[]
linkage_matrix = numpy.zeroes(num_records-1,4)

with open(filename, "r") as datafile:
    records = list(SeqIO.parse(datafile, "fasta"))
    num_records = len(records)

def init_clusters():
    clusters=[list(range(num_records)))]

def split_cluster():
    max_c = None
    max_sum = -1
    max_i=-1
    max_j=-1

    for c in clusters:
        sum=0
        max_di=-1
        max_dj=-1
        for i in range(len(c)):
            for j in range(i + 1, len(c)):
                d = get_distance(c[i],c[j])
                if(d>max_d):
                    max_di = c[i]
                    max_dj = c[j]
                sum= sum + d
        if sum > max_sum:
Exemplo n.º 22
0
def ReadReference(file):
    result = dict()
    for rec in SeqIO.parse_fasta(open(file, "r")):
        result[rec.id] = rec.seq
    return result
Exemplo n.º 23
0
                l += len(ins[i][1])
                last = ins[i][0]
            i += 1
        else:
            if last < d[j][0]:
                result.append(seq[last:d[j][0]])
                l += d[j][0] - last
                sys.stdout.write("Deletion: " + str(l) + " " + str(d[j][1]) +
                                 "\n")
                last = d[j][0] + d[j][1]
            j += 1
    result.append(seq[last:])
    return "".join(result)


def Generate(input, output, numins, numdel):
    reference = list(input)
    result = "".join([ch.seq for ch in reference])
    l = sum([len(ch) for ch in reference])
    ins = GroupByChrom(GenerateInsertions(numins, result), reference)
    d = GroupByChrom(GenerateDeletions(numdel, result), reference)
    for ch_ins, ch_d, chrom in itertools.izip(ins, d, reference):
        sys.stdout.write("Chromosome " + chrom.id + "\n")
        rec = SeqIO.SeqRecord(Apply(chrom.seq, ch_ins, ch_d), chrom.id)
        SeqIO.write(rec, output, "fasta")


if __name__ == '__main__':
    Generate(SeqIO.parse(open(sys.argv[1], "r"), "fasta"),
             open(sys.argv[2], "w"), int(sys.argv[3]), int(sys.argv[3]))
Exemplo n.º 24
0
 def test_load(self):
     data = SeqIO.load("seqImage/12-55-58.276.seq", nav_shape=[4, 5])
     print(data.axes_manager)
Exemplo n.º 25
0
 def test_load_upgrade(self, ans, lazy, shape):
     data = SeqIO.load_folder("testUpgrade/",
                              nav_shape=shape,
                              lazy=lazy)
     numpy.testing.assert_array_almost_equal(data.sum().data,
                                             ans)
Exemplo n.º 26
0
 def write(self, rec):
     SeqIO.write(rec, self.handlers[self.current_handler], "fastq")
     self.current_handler = 1 - self.current_handler
     self.count += 1
Exemplo n.º 27
0
 def ans(self):
     data = SeqIO.load_folder("testUpgrade/").sum()
     return data.data
Exemplo n.º 28
0
def main():
    if len(sys.argv) < 1:
        print USAGE
        sys.exit()

    parser = OptionParser()
    parser.add_option("-n", "--num_trials", dest="num_trials", default="1000")
    parser.add_option("-s",
                      "--sample_size",
                      dest="sample_size",
                      default="10000")
    parser.add_option("-i", "--input", dest="input", default=None)
    parser.add_option("-1", "--1", dest="first_mates")
    parser.add_option("-2", "--2", dest="second_mates")
    parser.add_option("-k", "--samples", dest="samples", default=0)
    parser.add_option("-o", "--output_dir", dest="output_dir", default="./")
    parser.add_option("-t", "--trials", dest="trials", default=0)
    parser.add_option("-d", "--debug_level", dest="debug_level", default=0)
    parser.set_usage(USAGE)
    (options, args) = parser.parse_args(sys.argv[1:])
    debug_level = int(options.debug_level)

    # Read through each reads, and add their respective input_number to sample_set.
    # [1 1 1 1 2 2 2 2 2 ... 6 6 6]
    # This way we can choose how many reads of what input file we should have based
    # on their abundances.
    # TODO(cmhill): Inefficient, but works fine for 100 million reads.
    total_read_set = []

    # We have to process the mates together in order.
    first_mate_files = options.first_mates.split(',')
    second_mate_files = options.second_mates.split(',')

    if len(first_mate_files) != len(second_mate_files):
        print "Error: Mate files need to have the same number."
        sys.exit(0)

    # Handle the option of multiple samples.
    for samples in options.samples.split(','):
        samples = int(samples)

        output_dir = options.output_dir + '/' + str(samples) + '/'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # Re-open all read files.
        first_mate_readers = []
        second_mate_readers = []

        for i in range(len(first_mate_files)):
            first_mate_readers.append(SeqIO.ParseFastQ(first_mate_files[i]))
            second_mate_readers.append(SeqIO.ParseFastQ(second_mate_files[i]))

        sample_reads_dict = {}
        sample_reads = []

        k = samples
        index = 0

        file_index = 0
        while file_index < len(first_mate_readers):

            second_mate = second_mate_readers[file_index].next()
            for first_mate in first_mate_readers[file_index]:
                index += 1

                # Reserviour sampling algorithm.
                if len(sample_reads) < k:
                    sample_reads.append(
                        (file_index, (first_mate, second_mate)))
                else:
                    r = random.randrange(index)
                    if r < k:
                        sample_reads[r] = ((file_index, (first_mate,
                                                         second_mate)))

                try:
                    second_mate = second_mate_readers[file_index].next()
                except:
                    pass

            if debug_level > 0:
                print 'File Index: ' + str(file_index)
                print 'Reads needed: ' + str(k)
                print sample_reads

            file_index += 1

        # TODO(cmhill): Remove, since we print the reads out right away.
        sample_reads_dict[file_index] = sample_reads

        file_index = 0
        # Write out these sample reads to file.
        # Re-open all read files.
        first_mate_writers = []
        second_mate_writers = []

        for i in range(len(first_mate_files)):
            first_mate_writers.append(
                open(output_dir + '/' + str(file_index) + '_1.fastq', 'w'))
            second_mate_writers.append(
                open(output_dir + '/' + str(file_index) + '_2.fastq', 'w'))
            file_index += 1

        for reads in sample_reads:
            first_mate_writers[reads[0]].write('\n'.join(reads[1][0]) + '\n')
            second_mate_writers[reads[0]].write('\n'.join(reads[1][1]) + '\n')
Exemplo n.º 29
0
def assign_read_kmers_to_contigs_iterative(reads_filename, kmer_ii, kmer_size,
                                           contig_abundances):
    """
    Given a set of reads and k-mer length, assign k-mer counts to the contigs based on their abundances.
    """

    contig_counts = defaultdict(int)

    pf = SeqIO.ParseFastQ(reads_filename)
    #tuple = pf.getNextReadSeq()
    kmer = None
    contig = None
    contigs_containing_kmer = []
    unalignable_kmers = 0
    total_abundance = 0
    num_reads = 0
    sum = 0
    for tuple in pf:
        #while tuple is not None:

        # For each k-mer in the read...
        for i in xrange(0, len(tuple[1]) - kmer_size + 1):

            # ... find what contigs contain it.
            kmer = tuple[1][i:i + kmer_size]
            if kmer in kmer_ii or revcompl(kmer) in kmer_ii:
                if kmer not in kmer_ii:
                    kmer = revcompl(kmer)

                # and randomly assign the count to one of the items.
                contigs_containing_kmer = accumulate(kmer_ii[kmer])

                #print kmer +'\t',
                contigs_containing_kmer = list(contigs_containing_kmer)
                #print contigs_containing_kmer

                # Calculate total abundance
                for contig in contigs_containing_kmer:
                    total_abundance += contig_abundances[contig[0]]

                # Choose
                choice = random.randint(1, total_abundance)

                curr_abundance = 0
                chosen_contig_tuple = None
                for contig in contigs_containing_kmer:
                    curr_abundance += contig_abundances[contig[0]]

                    # Have we found the right contig?
                    if curr_abundance >= choice:
                        chosen_contig_tuple = contig
                        #print 'Selecting:\t',
                        #print chosen_contig_tuple
                        break

                contig_counts[chosen_contig_tuple[0]] += 1

                total_abundance = 0

            else:
                unalignable_kmers += 1

        sum += len(tuple[1])
        num_reads += 1

    return contig_counts, sum / num_reads
Exemplo n.º 30
0
 	df['qstart'] = df['qstart'] - BUFFER	 #npwhere (column, change, column to apply), pybedtools? 
	if df['qstart'] <0:
		df['qstart'] = 0
	else: 
		pass 
df.apply(Xbuffer)
df.apply(Ybuffer)

#rerank blast file by e value

df.sort_values(by=['evalue', 'bitscore'], ascending=[True, False]) 

#create new file for each blasted TE 

file = open(TE'.fas', "w+")
for record in SeqIO.parse(INPUT, "fasta"):
	TEfile = open(TE'.fas', "a+")
	re.sub('__'(.*) '___', '#'\1'/', record.id)	
	record.id = 'CONSENSUS' + record.id
	TEfile.write(record.id + '\n', "w+")

#add top 40 blast hits to the new file  
	n=0
	while n <41:
		if seqIO.record == df['qseqid'] 
			TEfile.write(record.id, fasta, "a+") #write record,not record.id; 
			n += 1
		else: 
			pass 
		
#align with muscle 
Exemplo n.º 31
0
def OutputResults(output_file, format, result):
    output = open(output_file + "." + format, "w")
    for contig in result:
        SeqIO.write(contig, output, format)
    output.close()
Exemplo n.º 32
0
def ReadReference(file):
    result = dict()
    for rec in SeqIO.parse_fasta(open(file, "r")):
        result[rec.id] = rec.seq
    return result
Exemplo n.º 33
0
 def test_load(self):
     data = SeqIO.load("seq4dSTEM/")
     print(data.axes_manager)
Exemplo n.º 34
0
 def OutputBroken(self, output_file):
     output = open(output_file, "w")
     for contig in self.contigs:
         for subcontig in self.Break(contig):
             SeqIO.write(subcontig, output, "fasta")
     output.close()
Exemplo n.º 35
0
def OutputResults(output_file, format, result):
    output = open(output_file + "." + format, "w")
    for contig in result:
        SeqIO.write(contig, output, format)
    output.close()