示例#1
0
#!/usr/bin/env python
import argparse
import os

from libkuleshov.stats import n50

##############################################################################

parser = argparse.ArgumentParser()

parser.add_argument('--contigs')

args = parser.parse_args()

##############################################################################

with os.popen("cat %s | grep '>'" % args.contigs) as contigs:
	lengths = list()
	for line in contigs:
		name, length, unk = line[1:].strip().split()
		lengths.append(int(length))

print 'N50:', n50(lengths)
示例#2
0
#                 and ctg_lengths[o[0]] > 700 and ctg_lengths[o[1]] > 700}

overlaps = {e for e,c in edge_counts.iteritems() if c >= 3}

validated_overlaps = overlaps & true_overlaps

print len(overlaps), len(validated_overlaps), len(true_overlaps)

# validated_overlaps = true_overlaps

# compute connected components
G = nx.Graph()
G.add_nodes_from(fasta.references)
for i, ovl1 in enumerate(validated_overlaps):
  if i % 1000 == 0: print '%d/%d' % (i, len(validated_overlaps))
  ol = list(ovl1)
  if len(ol) > 2:
    assert False
  elif len(ol) < 2:
    continue
  G.add_edge(ol[0],ol[1])
connected_components = nx.connected_components(G)

cc_list = [cc for cc in connected_components]

component_lengths = [sum([ctg_lengths[ctg] for ctg in component])
                     for component in cc_list]

print sum(component_lengths)
print n50(component_lengths)
示例#3
0
import os
import sys

from libkuleshov.stats import n50

print "cat %s | tr -d '\\n'  | sed -re 's/>ctg([0-9])+/\\n/g' |     perl -nle 'print length'" % sys.argv[
    1]
fasta_length_pipe = os.popen(
    "cat %s | tr -d '\\n'  | sed -re 's/>ctg([0-9])+/\\n/g' | perl -nle 'print length'"
    % sys.argv[1])

fasta_lengths = [int(line) for line in fasta_length_pipe]
total_len = sum(fasta_lengths)

print 'N50: %d (%d total)' % (n50(fasta_lengths), total_len)
示例#4
0
            assert False
        elif len(ol) < 2:
            continue
        G.add_edge(ol[0], ol[1])
    connected_components = nx.connected_components(G)

    cc_list = [cc for cc in connected_components]
    pickle.dump(cc_list, open('connected_components.pkl', 'wb'))

component_lengths = [
    sum([ctg_lengths[ctg] for ctg in component]) for component in cc_list
]

print sum(fasta.lengths)
print sum(component_lengths)
print n50(component_lengths)

# ----------------------------------------------------------------------------
# compute overlaps


def myalign(s1, s2):
    return align.globalms(s1, s2, 2, -1, -2, -0.2, penalize_end_gaps=False)[0]


def ctgs_overlap(ctg1, ctg2):
    head_seq1 = ctg_seq[ctg1][:100]
    tail_seq1 = ctg_seq[ctg1][-100:]
    head_seq2 = ctg_seq[ctg2][:100]
    tail_seq2 = ctg_seq[ctg2][-100:]
示例#5
0
import os
import sys

from libkuleshov.stats import n50

print "cat %s | tr -d '\\n'  | sed -re 's/>ctg([0-9])+/\\n/g' |     perl -nle 'print length'" % sys.argv[1]
fasta_length_pipe = os.popen("cat %s | tr -d '\\n'  | sed -re 's/>ctg([0-9])+/\\n/g' | perl -nle 'print length'" % sys.argv[1])

fasta_lengths = [int(line) for line in fasta_length_pipe]
total_len = sum(fasta_lengths)

print 'N50: %d (%d total)' % (n50(fasta_lengths), total_len)
示例#6
0
#!/usr/bin/env python
import argparse

from libkuleshov.stats import n50
from libkuleshov.fastx import read_bed

##############################################################################

parser = argparse.ArgumentParser()

parser.add_argument('--contigs')

args = parser.parse_args()

##############################################################################

bed = read_bed(args.contigs)

lengths = [(end - start + 1) for ctg in bed.keys()
           for (start, end) in bed[ctg]]
print 'Contigs:', len(lengths)
print 'Average length:', float(sum(lengths)) / len(lengths)
print 'N50 length:', n50(lengths)
示例#7
0
#!/usr/bin/env python
import argparse
import os

from libkuleshov.stats import n50

##############################################################################

parser = argparse.ArgumentParser()

parser.add_argument('--contigs')

args = parser.parse_args()

##############################################################################

with os.popen("cat %s | grep '>'" % args.contigs) as contigs:
    lengths = list()
    for line in contigs:
        name, length, unk = line[1:].strip().split()
        lengths.append(int(length))

print 'N50:', n50(lengths)
示例#8
0
#!/usr/bin/env python
import argparse

from libkuleshov.stats import n50
from libkuleshov.fastx import read_bed

##############################################################################

parser = argparse.ArgumentParser()

parser.add_argument('--contigs')

args = parser.parse_args()

##############################################################################

bed = read_bed(args.contigs)

lengths = [(end - start + 1) for ctg in bed.keys() for (start, end) in bed[ctg]]
print 'Contigs:', len(lengths)
print 'Average length:', float(sum(lengths)) / len(lengths)
print 'N50 length:', n50(lengths)