예제 #1
0
def test_linear_search_three(bbdb_fullsize_prots):
    bbs = bbdb_fullsize_prots.query('all')
    u = Vertex(bbs, '_C')
    v = Vertex(bbs, 'NC')
    w = Vertex(bbs, 'N_')
    verts = (u, v, w)
    kw = dict(
        splice_max_rms=0.5,
        splice_ncontact_cut=30,
        splice_clash_d2=4.0**2,  # ca only
        splice_contact_d2=8.0**2,
        splice_rms_range=6,
        splice_clash_contact_range=60,
        splice_clash_contact_by_helix=False)
    e = Edge(u, bbs, v, bbs, **kw)
    f = Edge(v, bbs, w, bbs, **kw)
    edges = (e, f)

    # print('------------- e ---------------')
    # _print_splices(e)
    # print('------------- f ---------------')
    # _print_splices(f)
    # print('------------- result ---------------')

    ssdag = SearchSpaceDag(None, (bbs, ) * 3, verts, edges)
    result = grow_linear(ssdag, no_duplicate_bases=False)

    # from time import clock
    # t = clock()
    # for i in range(100):
    # grow_linear(verts, edges)
    # print('time 10', clock() - t)
    # assert 0

    assert np.allclose(result.pos[:, 0], np.eye(4))

    idx = _expand_inout_indices(verts, result.idx)
    isort = np.lexsort((idx[:, 3], idx[:, 2], idx[:, 1], idx[:, 0]))
    idx = idx[isort, :]
    assert len(idx) == _num_splices(e) * _num_splices(f)

    np.set_printoptions(threshold=np.nan)
    print(repr(idx))

    assert np.all(idx == [
        [0, 19, 0, 3], [0, 19, 0, 60], [0, 19, 16, 39], [0, 19, 17, 0],
        [0, 19, 17, 58], [0, 19, 18, 59], [0, 19, 22, 20], [0, 19, 22, 59],
        [0, 19, 23, 39], [0, 19, 23, 40], [0, 19, 23, 60], [17, 17, 0, 3],
        [17, 17, 0, 60], [17, 17, 16, 39], [17, 17, 17, 0], [17, 17, 17, 58],
        [17, 17, 18, 59], [17, 17, 22, 20], [17, 17, 22, 59], [17, 17, 23, 39],
        [17, 17, 23, 40], [17, 17, 23, 60], [18, 18, 0, 3], [18, 18, 0, 60],
        [18, 18, 16, 39], [18, 18, 17, 0], [18, 18, 17, 58], [18, 18, 18, 59],
        [18, 18, 22, 20], [18, 18, 22, 59], [18, 18, 23, 39], [18, 18, 23, 40],
        [18, 18, 23, 60], [22, 18, 0, 3], [22, 18, 0, 60], [22, 18, 16, 39],
        [22, 18, 17, 0], [22, 18, 17, 58], [22, 18, 18, 59], [22, 18, 22, 20],
        [22, 18, 22, 59], [22, 18, 23, 39], [22, 18, 23, 40], [22, 18, 23, 60],
        [23, 19, 0, 3], [23, 19, 0, 60], [23, 19, 16, 39], [23, 19, 17, 0],
        [23, 19, 17, 58], [23, 19, 18, 59], [23, 19, 22, 20], [23, 19, 22, 59],
        [23, 19, 23, 39], [23, 19, 23, 40], [23, 19, 23, 60]
    ])
예제 #2
0
def test_linear_search_two(bbdb_fullsize_prots):
    bbs = bbdb_fullsize_prots.query('all')
    u = Vertex(bbs, '_C')
    v = Vertex(bbs, 'N_')
    verts = (u, v)
    kw = dict(
        splice_max_rms=0.7,
        splice_ncontact_cut=30,
        splice_clash_d2=4.0**2,  # ca only
        splice_contact_d2=8.0**2,
        splice_rms_range=6,
        splice_clash_contact_range=60,
        splice_clash_contact_by_helix=False)
    edges = (Edge(u, bbs, v, bbs, **kw), )

    assert np.all(u.inout[:, 1] == np.arange(u.len))
    assert np.all(v.inout[:, 0] == np.arange(v.len))

    ssdag = SearchSpaceDag(None, (bbs, ) * 2, verts, edges)
    result = grow_linear(ssdag, no_duplicate_bases=False)
    assert np.allclose(result.pos[:, 0], np.eye(4))

    isort = np.lexsort((result.idx[:, 1], result.idx[:, 0]))
    sortidx = result.idx[isort, :]
    print(repr(sortidx))
    assert np.all(
        sortidx == [[0, 3], [0, 24], [0, 41], [0, 60], [1, 22], [1, 25],
                    [16, 3], [16, 39], [16, 40], [16, 57], [16, 60], [17, 0],
                    [17, 22], [17, 40], [17, 55], [17, 58], [18, 23], [18, 38],
                    [18, 55], [18, 59], [19, 24], [19, 41], [19, 56], [19, 60],
                    [20, 18], [20, 57], [21, 58], [22, 20], [22, 23], [22, 38],
                    [22, 39], [22, 59], [22, 60], [23, 24], [23, 39], [23, 40],
                    [23, 41], [23, 54], [23, 60]])
예제 #3
0
def test_linear_search_two(bbdb_fullsize_prots):
    bbs = bbdb_fullsize_prots.query('all')
    u = Vertex(bbs, '_C')
    v = Vertex(bbs, 'N_')
    e = Edge(u, bbs, v, bbs)

    assert np.all(u.inout[:, 1] == np.arange(u.len))
    assert np.all(v.inout[:, 0] == np.arange(v.len))

    result = grow_linear((u, v), (e, ))
    assert np.allclose(result.positions[:, 0], np.eye(4))
    assert np.all(
        result.indices == [[0, 22], [18, 40], [19, 21], [19, 60], [21, 0],
                           [21, 58], [22, 1], [22, 57], [22, 59], [22, 60],
                           [23, 20], [23, 58], [23, 59], [23, 60]])  # yapf: disable
예제 #4
0
def test_linear_search_three(bbdb_fullsize_prots):
    bbs = bbdb_fullsize_prots.query('all')
    u = Vertex(bbs, '_C')
    v = Vertex(bbs, 'NC')
    w = Vertex(bbs, 'N_')
    verts = (u, v, w)
    e = Edge(u, bbs, v, bbs)
    f = Edge(v, bbs, w, bbs)
    edges = (e, f)

    # print('------------- e ---------------')
    # _print_splices(e)
    # print('------------- f ---------------')
    # _print_splices(f)
    # print('------------- result ---------------')

    result = grow_linear(verts, edges)

    # from time import clock
    # t = clock()
    # for i in range(100):
    # grow_linear(verts, edges)
    # print('time 10', clock() - t)
    # assert 0

    idx = _expand_inout_indices(verts, result.indices)

    assert np.allclose(result.positions[:, 0], np.eye(4))

    assert len(idx) == _num_splices(e) * _num_splices(f)

    assert np.all(idx == [
        [19, 19, 0, 22], [19, 19, 18, 40], [19, 19, 19, 21], [19, 19, 19, 60],
        [19, 19, 21, 0], [19, 19, 21, 58], [19, 19, 22, 1], [19, 19, 22, 57],
        [19, 19, 22, 59], [19, 19, 22, 60], [19, 19, 23, 20], [19, 19, 23, 58],
        [19, 19, 23, 59], [19, 19, 23, 60], [21, 17, 0, 22], [21, 17, 18, 40],
        [21, 17, 19, 21], [21, 17, 19, 60], [21, 17, 21, 0], [21, 17, 21, 58],
        [21, 17, 22, 1], [21, 17, 22, 57], [21, 17, 22, 59], [21, 17, 22, 60],
        [21, 17, 23, 20], [21, 17, 23, 58], [21, 17, 23, 59], [21, 17, 23, 60],
        [22, 16, 0, 22], [22, 16, 18, 40], [22, 16, 19, 21], [22, 16, 19, 60],
        [22, 16, 21, 0], [22, 16, 21, 58], [22, 16, 22, 1], [22, 16, 22, 57],
        [22, 16, 22, 59], [22, 16, 22, 60], [22, 16, 23, 20], [22, 16, 23, 58],
        [22, 16, 23, 59], [22, 16, 23, 60], [22, 18, 0, 22], [22, 18, 18, 40],
        [22, 18, 19, 21], [22, 18, 19, 60], [22, 18, 21, 0], [22, 18, 21, 58],
        [22, 18, 22, 1], [22, 18, 22, 57], [22, 18, 22, 59], [22, 18, 22, 60],
        [22, 18, 23, 20], [22, 18, 23, 58], [22, 18, 23, 59], [22, 18, 23, 60],
        [22, 19, 0, 22], [22, 19, 18, 40], [22, 19, 19, 21], [22, 19, 19, 60],
        [22, 19, 21, 0], [22, 19, 21, 58], [22, 19, 22, 1], [22, 19, 22, 57],
        [22, 19, 22, 59], [22, 19, 22, 60], [22, 19, 23, 20], [22, 19, 23, 58],
        [22, 19, 23, 59], [22, 19, 23, 60], [23, 17, 0, 22], [23, 17, 18, 40],
        [23, 17, 19, 21], [23, 17, 19, 60], [23, 17, 21, 0], [23, 17, 21, 58],
        [23, 17, 22, 1], [23, 17, 22, 57], [23, 17, 22, 59], [23, 17, 22, 60],
        [23, 17, 23, 20], [23, 17, 23, 58], [23, 17, 23, 59], [23, 17, 23, 60],
        [23, 18, 0, 22], [23, 18, 18, 40], [23, 18, 19, 21], [23, 18, 19, 60],
        [23, 18, 21, 0], [23, 18, 21, 58], [23, 18, 22, 1], [23, 18, 22, 57],
        [23, 18, 22, 59], [23, 18, 22, 60], [23, 18, 23, 20], [23, 18, 23, 58],
        [23, 18, 23, 59], [23, 18, 23, 60], [23, 19, 0, 22], [23, 19, 18, 40],
        [23, 19, 19, 21], [23, 19, 19, 60], [23, 19, 21, 0], [23, 19, 21, 58],
        [23, 19, 22, 1], [23, 19, 22, 57], [23, 19, 22, 59], [23, 19, 22, 60],
        [23, 19, 23, 20], [23, 19, 23, 58], [23, 19, 23, 59], [23, 19, 23, 60]
    ])  # yapf: disable
예제 #5
0
def simple_search_dag(criteria,
                      db=None,
                      nbblocks=100,
                      min_seg_len=15,
                      parallel=False,
                      verbosity=0,
                      timing=0,
                      modbbs=None,
                      make_edges=True,
                      merge_bblock=None,
                      precache_splices=False,
                      precache_only=False,
                      bbs=None,
                      only_seg=None,
                      source=None,
                      print_edge_summary=False,
                      no_duplicate_bases=False,
                      shuffle_bblocks=False,
                      use_saved_bblocks=False,
                      output_prefix='./worms',
                      **kw):
    bbdb, spdb = db
    queries, directions = zip(*criteria.bbspec)
    tdb = time()
    if bbs is None:
        bbs = list()
        savename = output_prefix + '_bblocks.pickle'

        if use_saved_bblocks and os.path.exists(savename):
            with open(savename, 'rb') as inp:
                bbnames_list = _pickle.load(inp)
            for bbnames in bbnames_list:
                bbs.append([bbdb.bblock(n) for n in bbnames])

        else:
            for iquery, query in enumerate(queries):
                msegs = [
                    i + len(queries) if i < 0 else i
                    for i in criteria.which_mergeseg()
                ]
                if iquery in msegs[1:]:
                    print('seg', iquery, 'repeating bblocks from', msegs[0])
                    bbs.append(bbs[msegs[0]])
                    continue
                bbs0 = bbdb.query(
                    query,
                    max_bblocks=nbblocks,
                    shuffle_bblocks=shuffle_bblocks,
                    parallel=parallel,
                )
                bbs.append(bbs0)

        bases = [
            Counter(bytes(b.base).decode('utf-8') for b in bbs0)
            for bbs0 in bbs
        ]
        assert len(bbs) == len(queries)
        for i, v in enumerate(bbs):
            assert len(v) > 0, 'no bblocks for query: "' + queries[i] + '"'
        print('bblock queries:', str(queries))
        print('bblock numbers:', [len(b) for b in bbs])
        print('bblocks id:', [id(b) for b in bbs])
        print('bblock0 id ', [id(b[0]) for b in bbs])
        print('base_counts:')
        for query, basecount in zip(queries, bases):
            counts = ' '.join(f'{k}: {c}' for k, c in basecount.items())
            print(f'   {query:10}', counts)

        if criteria.is_cyclic:
            for a, b in zip(bbs[criteria.from_seg], bbs[criteria.to_seg]):
                assert a is b
            bbs[criteria.to_seg] = bbs[criteria.from_seg]

        if use_saved_bblocks:
            bbnames = [[bytes(b.file).decode('utf-8') for b in bb]
                       for bb in bbs]
            with open(savename, 'wb') as out:
                _pickle.dump(bbnames, out)

    else:
        bbs = bbs.copy()

    assert len(bbs) == len(criteria.bbspec)
    if modbbs: modbbs(bbs)
    if merge_bblock is not None and merge_bblock >= 0:
        # print('which_mergeseg', criteria.bbspec, criteria.which_mergeseg())
        for i in criteria.which_mergeseg():
            bbs[i] = (bbs[i][merge_bblock], )

    tdb = time() - tdb
    # info(
    # f'bblock creation time {tdb:7.3f} num bbs: ' +
    # str([len(x) for x in bbs])
    # )

    if precache_splices:
        bbnames = [[bytes(bb.file) for bb in bbtup] for bbtup in bbs]
        bbpairs = set()
        # for bb1, bb2, dirn1 in zip(bbnames, bbnames[1:], directions):
        for i in range(len(bbnames) - 1):
            bb1 = bbnames[i]
            bb2 = bbnames[i + 1]
            dirn1 = directions[i]
            rev = dirn1[1] == 'N'
            if bbs[i] is bbs[i + 1]:
                bbpairs.update((a, a) for a in bb1)
            else:
                bbpairs.update(
                    (b, a) if rev else (a, b) for a in bb1 for b in bb2)
        precompute_splicedb(db,
                            bbpairs,
                            verbosity=verbosity,
                            parallel=parallel,
                            **kw)
    if precache_only:
        return bbs

    verts = [None] * len(queries)
    edges = [None] * len(queries[1:])
    if source:
        srcdirn = [''.join('NC_' [d] for d in source.verts[i].dirn)
                   for i in range(len(source.verts))] # yapf: disable
        srcverts, srcedges = list(), list()
        for i, bb in enumerate(bbs):
            for isrc, bbsrc in enumerate(source.bbs):
                if directions[i] != srcdirn[isrc]: continue
                if [b.filehash for b in bb] == [b.filehash for b in bbsrc]:
                    verts[i] = source.verts[isrc]
                    srcverts.append(isrc)
        for i, bb in enumerate(zip(bbs, bbs[1:])):
            bb0, bb1 = bb
            for isrc, bbsrc in enumerate(zip(source.bbs, source.bbs[1:])):
                bbsrc0, bbsrc1 = bbsrc
                if directions[i] != srcdirn[isrc]: continue
                if directions[i + 1] != srcdirn[isrc + 1]: continue
                he = [b.filehash for b in bb0] == [b.filehash for b in bbsrc0]
                he &= [b.filehash for b in bb1] == [b.filehash for b in bbsrc1]
                if not he: continue
                edges[i] = source.edges[isrc]
                srcedges.append(isrc)

    if not make_edges: edges = []

    tvertex = time()
    exe = InProcessExecutor()

    if parallel:
        exe = cf.ThreadPoolExecutor(max_workers=parallel)
    with exe as pool:
        if only_seg is not None:
            save = bbs, directions
            bbs = [bbs[only_seg]]
            directions = [directions[only_seg]]
            verts = [verts[only_seg]]
        futures = list()
        for i, bb in enumerate(bbs):
            dirn = directions[i]
            if verts[i] is None:
                futures.append(
                    pool.submit(Vertex, bb, dirn, min_seg_len=min_seg_len))
        verts_new = [f.result() for f in futures]
        isnone = [i for i in range(len(verts)) if verts[i] is None]
        for i, inone in enumerate(isnone):
            verts[inone] = verts_new[i]
        # print(i, len(verts_new), len(verts))
        if isnone:
            assert i + 1 == len(verts_new)
        assert all(v for v in verts)
        if only_seg is not None:
            verts = ([None] * only_seg + verts + [None] *
                     (len(queries) - only_seg - 1))
            bbs, directions = save
    tvertex = time() - tvertex
    # info(
    # f'vertex creation time {tvertex:7.3f} num verts ' +
    # str([v.len if v else 0 for v in verts])
    # )

    if make_edges:
        tedge = time()
        for i, e in enumerate(edges):
            if e is None:
                edges[i] = Edge(verts[i],
                                bbs[i],
                                verts[i + 1],
                                bbs[i + 1],
                                splicedb=spdb,
                                verbosity=verbosity,
                                precache_splices=precache_splices,
                                **kw)
        tedge = time() - tedge
        if print_edge_summary:
            _print_edge_summary(edges)
        # info(
        # f'edge creation time {tedge:7.3f} num splices ' +
        # str([e.total_allowed_splices()
        # for e in edges]) + ' num exits ' + str([e.len for e in edges])
        # )
        spdb.sync_to_disk()

    toret = SearchSpaceDag(criteria.bbspec, bbs, verts, edges)
    if timing:
        toret = toret, tdb, tvertex, tedge
    return toret
예제 #6
0
파일: ssdag.py 프로젝트: willsheffler/worms
def simple_search_dag(
    criteria,
    db=None,
    nbblocks=[64],
    min_seg_len=15,
    parallel=False,
    verbosity=0,
    timing=0,
    modbbs=None,
    make_edges=True,
    merge_bblock=None,
    merge_segment=None,
    precache_splices=False,
    precache_only=False,
    bbs=None,
    bblock_ranges=[],
    only_seg=None,
    source=None,
    print_edge_summary=False,
    no_duplicate_bases=False,
    shuffle_bblocks=False,
    use_saved_bblocks=False,
    output_prefix="./worms",
    only_ivertex=[],
    **kw,
):
    bbdb, spdb = db
    queries, directions = zip(*criteria.bbspec)
    tdb = time()
    if bbs is None:
        bbs = list()
        savename = output_prefix + "_bblocks.pickle"

        if use_saved_bblocks and os.path.exists(savename):
            with open(savename, "rb") as inp:
                bbnames_list = _pickle.load(inp)
            # for i, l in enumerate(bbnames_list)
            # if len(l) >= nbblocks[i]:
            # assert 0, f"too many bblocks in {savename}"
            for i, bbnames in enumerate(bbnames_list):
                bbs.append([bbdb.bblock(n) for n in bbnames[:nbblocks[i]]])

        else:
            for iquery, query in enumerate(queries):
                if hasattr(criteria, "cloned_segments"):
                    msegs = [
                        i + len(queries) if i < 0 else i
                        for i in criteria.cloned_segments()
                    ]
                    if iquery in msegs[1:]:
                        print("seg", iquery, "repeating bblocks from",
                              msegs[0])
                        bbs.append(bbs[msegs[0]])
                        continue
                bbs0 = bbdb.query(
                    query,
                    max_bblocks=nbblocks[iquery],
                    shuffle_bblocks=shuffle_bblocks,
                    parallel=parallel,
                )
                bbs.append(bbs0)

            if bblock_ranges:
                bbs_sliced = list()
                assert len(bblock_ranges) == 2 * len(bbs)
                for ibb, bb in enumerate(bbs):
                    lb, ub = bblock_ranges[2 * ibb:2 * ibb + 2]
                    bbs_sliced.append(bb[lb:ub])
                bbs = bbs_sliced

            for ibb, bb in enumerate(bbs):
                print("bblocks", ibb)
                for b in bb:
                    print("   ", bytes(b.file).decode("utf-8"))

        bases = [
            Counter(bytes(b.base).decode("utf-8") for b in bbs0)
            for bbs0 in bbs
        ]
        assert len(bbs) == len(queries)
        for i, v in enumerate(bbs):
            assert len(v) > 0, 'no bblocks for query: "' + queries[i] + '"'
        print("bblock queries:", str(queries))
        print("bblock numbers:", [len(b) for b in bbs])
        print("bblocks id:", [id(b) for b in bbs])
        print("bblock0 id ", [id(b[0]) for b in bbs])
        print("base_counts:")
        for query, basecount in zip(queries, bases):
            counts = " ".join(f"{k}: {c}" for k, c in basecount.items())
            print(f"   {query:10}", counts)

        if criteria.is_cyclic:
            # for a, b in zip(bbs[criteria.from_seg], bbs[criteria.to_seg]):
            # assert a is b
            bbs[criteria.to_seg] = bbs[criteria.from_seg]

        if use_saved_bblocks and not os.path.exists(savename):
            bbnames = [[bytes(b.file).decode("utf-8") for b in bb]
                       for bb in bbs]
            with open(savename, "wb") as out:
                _pickle.dump(bbnames, out)

    else:
        bbs = bbs.copy()

    assert len(bbs) == len(criteria.bbspec)
    if modbbs:
        modbbs(bbs)

    if merge_bblock is not None and merge_bblock >= 0:
        # print('cloned_segments', criteria.bbspec, criteria.cloned_segments())
        if hasattr(criteria, "cloned_segments") and merge_segment is None:
            for i in criteria.cloned_segments():
                # print('   ', 'merge seg', i, 'merge_bblock', merge_bblock)
                bbs[i] = (bbs[i][merge_bblock], )
        else:
            if merge_segment is None:
                merge_segment = 0
            # print('   ', 'merge_segment not None')
            # print('   ', [len(b) for b in bbs])
            # print('   ', 'merge_segment', merge_segment)
            # print('   ', 'merge_bblock', merge_bblock, len(bbs[merge_segment]))
            bbs[merge_segment] = (bbs[merge_segment][merge_bblock], )

    tdb = time() - tdb
    # info(
    # f'bblock creation time {tdb:7.3f} num bbs: ' +
    # str([len(x) for x in bbs])
    # )

    if precache_splices:
        bbnames = [[bytes(bb.file) for bb in bbtup] for bbtup in bbs]
        bbpairs = set()
        # for bb1, bb2, dirn1 in zip(bbnames, bbnames[1:], directions):
        for i in range(len(bbnames) - 1):
            bb1 = bbnames[i]
            bb2 = bbnames[i + 1]
            dirn1 = directions[i]
            rev = dirn1[1] == "N"
            if bbs[i] is bbs[i + 1]:
                bbpairs.update((a, a) for a in bb1)
            else:
                bbpairs.update(
                    (b, a) if rev else (a, b) for a in bb1 for b in bb2)
        precompute_splicedb(db,
                            bbpairs,
                            verbosity=verbosity,
                            parallel=parallel,
                            **kw)
    if precache_only:
        return bbs

    verts = [None] * len(queries)
    edges = [None] * len(queries[1:])
    if source:
        srcdirn = [
            "".join("NC_"[d] for d in source.verts[i].dirn)
            for i in range(len(source.verts))
        ]  # yapf: disable
        srcverts, srcedges = list(), list()
        for i, bb in enumerate(bbs):
            for isrc, bbsrc in enumerate(source.bbs):

                # fragile code... detecting this way can be wrong
                # print(i, isrc, directions[i], srcdirn[isrc])
                if directions[i] != srcdirn[isrc]:
                    continue
                if [b.filehash for b in bb] == [b.filehash for b in bbsrc]:
                    # super hacky fix, really need to be passed info on what's what
                    if srcverts and srcverts[-1] + 1 != isrc:
                        continue
                    verts[i] = source.verts[isrc]
                    srcverts.append(isrc)

        for i, bb in enumerate(zip(bbs, bbs[1:])):
            bb0, bb1 = bb
            for isrc, bbsrc in enumerate(zip(source.bbs, source.bbs[1:])):
                bbsrc0, bbsrc1 = bbsrc
                if directions[i] != srcdirn[isrc]:
                    continue
                if directions[i + 1] != srcdirn[isrc + 1]:
                    continue
                he = [b.filehash for b in bb0] == [b.filehash for b in bbsrc0]
                he &= [b.filehash for b in bb1] == [b.filehash for b in bbsrc1]
                if not he:
                    continue
                edges[i] = source.edges[isrc]
                srcedges.append(isrc)

    if not make_edges:
        edges = []

    tvertex = time()
    exe = InProcessExecutor()

    if parallel:
        exe = cf.ThreadPoolExecutor(max_workers=parallel)
    with exe as pool:
        if only_seg is not None:
            save = bbs, directions
            bbs = [bbs[only_seg]]
            directions = [directions[only_seg]]
            verts = [verts[only_seg]]
        futures = list()
        for i, bb in enumerate(bbs):
            dirn = directions[i]
            if verts[i] is None:
                futures.append(
                    pool.submit(Vertex, bb, dirn, min_seg_len=min_seg_len))
        verts_new = [f.result() for f in futures]
        isnone = [i for i in range(len(verts)) if verts[i] is None]
        for i, inone in enumerate(isnone):
            verts[inone] = verts_new[i]
            if source:
                print('use new vertex', inone)
        if only_ivertex:
            # raise NotImplementedError
            print("!!!!!!! using one ivertex !!!!!", only_ivertex, len(verts),
                  [v.len for v in verts])
            if len(only_ivertex) != len(verts):
                print(
                    "NOT altering verts, len(only_ivertex)!=len(verts) continuing...",
                    "this is ok if part of a sub-protocol")
            else:
                for i, v in enumerate(verts):
                    if v.len > 1:  # could already have been "trimmed"
                        assert only_ivertex[i] < v.len
                        v.reduce_to_only_one_inplace(only_ivertex[i])
                    # print('x2exit', v.x2exit.shape)
                    # print('x2orig', v.x2orig.shape)
                    # print('ires', v.ires.shape)
                    # print('isite', v.isite.shape)
                    # print('ichain', v.ichain.shape)
                    # print('ibblock', v.ibblock.shape)
                    # print('inout', v.inout.shape, v.inout[10:])
                    # print('inbreaks', v.inbreaks.shape, v.inbreaks[10:])
                    # print('dirn', v.dirn.shape)
                    # # assert 0
        # print(i, len(verts_new), len(verts))
        if isnone:
            assert i + 1 == len(verts_new)
        assert all(v for v in verts)
        if only_seg is not None:
            verts = [None] * only_seg + verts + [None] * (len(queries) -
                                                          only_seg - 1)
            bbs, directions = save
    tvertex = time() - tvertex
    # info(
    # f'vertex creation time {tvertex:7.3f} num verts ' +
    # str([v.len if v else 0 for v in verts])
    # )

    if make_edges:
        tedge = time()
        for i, e in enumerate(edges):
            if e is not None:
                continue
            edges[i], edge_analysis = Edge(
                verts[i],
                bbs[i],
                verts[i + 1],
                bbs[i + 1],
                splicedb=spdb,
                verbosity=verbosity,
                precache_splices=precache_splices,
                **kw,
            )
            allok = all(x[6] for x in edge_analysis)
            if allok:
                continue
            print("=" * 80)
            print("info for edges with no valid splices",
                  edges[i].total_allowed_splices())
            for tup in edge_analysis:
                iblk0, iblk1, ofst0, ofst1, ires0, ires1 = tup[:6]
                ok, f_clash, f_rms, f_ncontact, f_ncnh, f_nhc = tup[6:12]
                m_rms, m_ncontact, m_ncnh, m_nhc = tup[12:]
                if ok:
                    continue
                assert len(bbs[i + 0]) > iblk0
                assert len(bbs[i + 1]) > iblk1
                print("=" * 80)
                print("egde Bblock A", bytes(bbs[i][iblk0].file))
                print("egde Bblock B", bytes(bbs[i + 1][iblk1].file))
                print(
                    f"bb {iblk0:3} {iblk1:3}",
                    f"ofst {ofst0:4} {ofst1:4}",
                    f"resi {ires0.shape} {ires1.shape}",
                )
                print(
                    f"clash_ok {int(f_clash*100):3}%",
                    f"rms_ok {int(f_rms*100):3}%",
                    f"ncontact_ok {int(f_ncontact*100):3}%",
                    f"ncnh_ok {int(f_ncnh*100):3}%",
                    f"nhc_ok {int(f_nhc*100):3}%",
                )
                print(
                    f"min_rms {m_rms:7.3f}",
                    f"max_ncontact {m_ncontact:7.3f}",
                    f"max_ncnh {m_ncnh:7.3f}",
                    f"max_nhc {m_nhc:7.3f}",
                )
            print("=" * 80)
            fok = np.stack([x[7:12] for x in edge_analysis]).mean(axis=0)
            rmsmin = np.array([x[12] for x in edge_analysis]).min()
            fmx = np.stack([x[13:] for x in edge_analysis]).max(axis=0)
            print(f"{' SPLICE FAIL SUMMARY ':=^80}")
            print(f"splice clash ok               {int(fok[0]*100):3}%")
            print(f"splice rms ok                 {int(fok[1]*100):3}%")
            print(f"splice ncontacts ok           {int(fok[2]*100):3}%")
            print(f"splice ncontacts_no_helix ok  {int(fok[3]*100):3}%")
            print(f"splice nhelixcontacted ok     {int(fok[4]*100):3}%")
            print(f"min rms of any failing        {rmsmin}")
            print(
                f"max ncontact of any failing   {fmx[0]} (maybe large for non-5-helix splice)"
            )
            print(
                f"max ncontact_no_helix         {fmx[1]} (will be 999 for non-5-helix splice)"
            )
            print(
                f"max nhelix_contacted          {fmx[2]} (will be 999 for non-5-helix splice)"
            )
            print("=" * 80)
            assert edges[i].total_allowed_splices() > 0, "invalid splice"
        tedge = time() - tedge
        if print_edge_summary:
            _print_edge_summary(edges)
        # info(
        # f'edge creation time {tedge:7.3f} num splices ' +
        # str([e.total_allowed_splices()
        # for e in edges]) + ' num exits ' + str([e.len for e in edges])
        # )
        spdb.sync_to_disk()

    toret = SearchSpaceDag(criteria.bbspec, bbs, verts, edges)
    if timing:
        toret = toret, tdb, tvertex, tedge
    return toret