示例#1
0
def run_tests(names, num_items, num_its, type_=int):
    fns = dict([
        ('btrees', lambda: BTrees.OOBTree.OOBTree()),
        ('blist', lambda: blist.sorteddict()),
        ('bintrees', lambda: bintrees.FastRBTree()), ('dict', lambda: dict()),
        ('banyan_red_black_tree',
         lambda: banyan.SortedDict(key_type=type_, alg=banyan.RED_BLACK_TREE)),
        ('banyan_red_black_tree_rank_updator',
         lambda: banyan.SortedDict(key_type=type_,
                                   alg=banyan.RED_BLACK_TREE,
                                   updator=banyan.RankUpdator)),
        ('banyan_red_black_tree_min_max_updator',
         lambda: banyan.SortedDict(key_type=type_,
                                   alg=banyan.RED_BLACK_TREE,
                                   updator=banyan.MinMaxUpdator)),
        ('banyan_splay_tree',
         lambda: banyan.SortedDict(key_type=type_, alg=banyan.SPLAY_TREE)),
        ('banyan_sorted_list',
         lambda: banyan.SortedDict(key_type=type_, alg=banyan.SORTED_LIST)),
        ('banyan_red_black_tree_gen',
         lambda: banyan.SortedDict(alg=banyan.RED_BLACK_TREE)),
        ('banyan_splay_tree_gen',
         lambda: banyan.SortedDict(alg=banyan.SPLAY_TREE)),
        ('banyan_sorted_list_gen',
         lambda: banyan.SortedDict(alg=banyan.SORTED_LIST))
    ])
    t = dict([])
    for name in names:
        t[name] = _run_test(fns[name], type_, num_items, num_its)
    return t
def construct_intervals(mesh, zhat):
    points, triangles = mesh

    intervals = banyan.SortedDict([], key_type = (float,float), updator = banyan.OverlappingIntervalsUpdator)

    for i, tri in enumerate(triangles):
        a,b,c = tri
        z = [np.dot(zhat, points[a]),np.dot(zhat, points[b]),np.dot(zhat, points[c])]
        intervals[(min(z),max(z))] = i

    return intervals
示例#3
0
def run_tests(names, num_items, num_its):
    # Tmp Ami - make key-type separate labels below
    fns = dict([
        ('btrees', lambda es: BTrees.OOBTree.OOBTree([(e, 1) for e in es])),
        ('blist', lambda es: blist.sorteddict([(e, 1) for e in es])),
        ('bintrees', lambda es: bintrees.FastRBTree([(e, 1) for e in es])),
        ('set', lambda es: set([(e, 1) for e in es])),
        ('banyan_red_black_tree', lambda es: banyan.SortedDict(
            [(e, 1) for e in es], alg=banyan.RED_BLACK_TREE)),
        ('banyan_splay_tree', lambda es: banyan.SortedDict(
            [(e, 1) for e in es], alg=banyan.SPLAY_TREE)),
        ('banyan_sorted_list', lambda es: banyan.SortedDict(
            [(e, 1) for e in es], alg=banyan.SORTED_LIST)),
        ('banyan_red_black_tree_gen', lambda es: banyan.SortedDict(
            [(e, 1) for e in es], key_type=int, alg=banyan.RED_BLACK_TREE)),
        ('banyan_splay_tree_gen', lambda es: banyan.SortedDic(
            [(e, 1) for e in es], key_type=int, alg=banyan.SPLAY_TREE)),
        ('banyan_sorted_list_gen', lambda es: banyan.SortedDict(
            [(e, 1) for e in es], key_type=int, alg=banyan.SORTED_LIST))
    ])
    t = dict([])
    for name in names:
        t[name] = _run_test(fns[name], int, num_items, num_its)
    return t
示例#4
0
                        if ((precise) and
                            (call['RV'] >= 2)) or ((not precise) and
                                                   (call['DV'] >= 2)):
                            gqAlt.append(call['GQ'])
            genotypeRatio = float(len(gqAlt) + len(gqRef)) / float(
                len(record.samples))
            if genotypeRatio > ratioGeno:
                if (len(gqRef)) and (len(gqAlt)) and (
                        numpy.median(gqRef) >=
                        gqRefCut) and (numpy.median(gqAlt) >= gqAltCut):
                    if (numpy.percentile(ratioRef, 99)
                            == 0) and (numpy.median(ratioAlt) >= altAF):
                        #print(record.INFO['END']-record.POS, len(gqRef), len(gqAlt), numpy.median(gqRef), numpy.median(gqAlt), numpy.percentile(ratioRef, 99), numpy.median(ratioAlt), genotypeRatio, sep="\t")
                        if not sv.has_key(record.CHROM):
                            sv[record.CHROM] = banyan.SortedDict(
                                key_type=(int, int),
                                alg=banyan.RED_BLACK_TREE,
                                updator=banyan.OverlappingIntervalsUpdator)
                        if (record.POS,
                                record.INFO['END']) not in sv[record.CHROM]:
                            sv[record.CHROM][(record.POS,
                                              record.INFO['END'])] = (
                                                  record.ID, record.INFO['PE'],
                                                  record.INFO['CT'])
                        else:
                            svDups[(record.CHROM, record.POS,
                                    record.INFO['END'])].append(
                                        (record.ID, record.INFO['PE'],
                                         record.INFO['CT']))

# Output vcf records
if args.vcfFile:
示例#5
0
 def __init__(self):
     self.ports = collections.defaultdict(lambda: banyan.SortedDict(
         key_type=(float, float),
         updator=banyan.OverlappingIntervalsUpdator))
示例#6
0
                    metavar='out.vcf',
                    required=True,
                    dest='outVCF',
                    help='output vcf file (required)')
args = parser.parse_args()

# Compute all stretches of Ns in the reference
nRun = dict()
refLen = dict()
f_in = gzip.open(args.ref) if args.ref.endswith('.gz') else open(args.ref)
for seqName, seqNuc, seqQuals in readfq(f_in):
    refLen[seqName] = len(seqNuc)
    print("Processing", seqName, refLen[seqName])
    if not nRun.has_key(seqName):
        nRun[seqName] = banyan.SortedDict(
            key_type=(int, int),
            alg=banyan.RED_BLACK_TREE,
            updator=banyan.OverlappingIntervalsUpdator)
    for m in re.compile("([Nn]+)").finditer(seqNuc):
        nRun[seqName][m.span()] = 1

# Add read-depth control region to VCF file
if args.vcfFile:
    vcf_reader = vcf.Reader(
        open(args.vcfFile), 'r',
        compressed=True) if args.vcfFile.endswith('.gz') else vcf.Reader(
            open(args.vcfFile), 'r', compressed=False)
    if 'CONTROL' not in vcf_reader.infos.keys():
        vcf_reader.infos['CONTROL'] = vcf.parser._Info('CONTROL', 1, 'Integer',
                                                       'Control variant.')
    vcf_writer = vcf.Writer(open(args.outVCF, 'w'),
                            vcf_reader,