示例#1
0
def getComponents(matches, max_distance=0, min_overlap=0, by_query=False):
    """return overlapping matches.

    max_distance
       allow reads to be joined if they are # residues apart. 
       Adjacent reads are 1 residue apart, overlapping reads are 0 residues
       apart
    min_overlap
       require at least # residues to be overlapping

    """

    addAlignments(matches, by_query=by_query)

    components = Components.IComponents()

    for x in range(0, len(matches)):
        components.add(x, x)

    if min_overlap > 0 and max_distance > 0:
        raise ValueError(
            "both min_overlap (%i) and max_distance (%i) > 0" % (min_overlap, max_distance))

    if by_query:
        if min_overlap > 0:
            f = lambda x, y: alignlib_lite.py_getAlignmentOverlap(
                matches[x].mMapQuery2Target,
                matches[y].mMapQuery2Target,
                alignlib_lite.py_RR) >= min_overlap
        else:
            f = lambda x, y: alignlib_lite.py_getAlignmentShortestDistance(
                matches[x].mMapQuery2Target,
                matches[y].mMapQuery2Target,
                alignlib_lite.py_RR) <= max_distance
    else:
        if min_overlap > 0:
            f = lambda x, y: alignlib_lite.py_getAlignmentOverlap(
                matches[x].mMapTarget2Query,
                matches[y].mMapTarget2Query,
                alignlib_lite.py_RR) >= min_overlap
        else:
            f = lambda x, y: alignlib_lite.py_getAlignmentShortestDistance(
                matches[x].mMapTarget2Query,
                matches[y].mMapTarget2Query,
                alignlib_lite.py_RR) <= max_distance

    for x in range(len(matches)):
        for y in range(0, x):
            if f(x, y):
                components.add(x, y)

    return components.getComponents()
示例#2
0
def MapTranscripts2Genes(transcripts, map_transcript2location):
    """map all orthologous and overlapping transcripts into genes.

    The new gene is chosen at random.
    """

    graph = Components.SComponents()

    map_id2info = {}
    added = set()
    for transcript in transcripts:
        map_id2info[transcript.mTranscript] = (
            transcript.mSchema, transcript.mQuality)
        token1, strand1, from1, to1 = map_transcript2location[
            transcript.mTranscript]
        # add link to self as otherwise the component is empty
        graph.add(transcript.mTranscript, transcript.mTranscript)
        # add link to overlapping transcripts
        for x in added:
            token2, strand2, from2, to2 = map_transcript2location[x]

            if token1 == token2 and strand1 == strand2 and \
                    min(to1, to2) - max(from1, from2) > 0:
                graph.add(transcript.mTranscript, x)
        added.add(transcript.mTranscript)

    components = graph.getComponents()

    new_genes = {}
    new_transcripts = []
    for component in components:
        g = component[0]
        new_genes[g] = []
        for id in component:
            s, q = map_id2info[id]
            t = Orthologs.Transcript()
            t.mSchema = s
            t.mTranscript = id
            t.mGene = g
            t.mQuality = q
            new_genes[g].append(t)
            new_transcripts.append(t)

    return new_transcripts, new_genes
示例#3
0
import CGAT.Components as Components

c = Components.SComponents()

links = (("1", "2"), ("1", "3"), ("2", "3"), ("3", "3"), ("4", "5"), ("6",
                                                                      "6"))

for a, b in links:
    print a, b, c.add(a, b)

for x in "01234567":
    print x, c.get(x)

print c.getNumNodes()

print c.getComponents()

c = Components.IComponents()

for a, b in links:
    print a, b, c.add(int(a), int(b))

for x in range(0, 8):
    print x, c.get(x)

print c.getNumNodes()

print c.getComponents()

c = Components.IComponents()
print c.getComponents()