Пример #1
0
def DoRec():
	global GeneIndexes

	AttrDict = gff.GetAttrDict()
	CurrGeneIndex = gff.GetIntAttr("gene_index", -1)
	if CurrGeneIndex == -1:
		gff.WriteRec(sys.stdout)
		return
	
	Key = gff.Label + "." + str(CurrGeneIndex)
	if Key in GeneIndexes.keys():
		NewGeneIndex = GeneIndexes[Key]
	else:
		NewGeneIndex = len(GeneIndexes)
		GeneIndexes[Key] = NewGeneIndex
	
	AttrDict["gene_index"] = NewGeneIndex
	gff.SetAttrsFromDict(AttrDict)

	gff.WriteRec(sys.stdout)	
Пример #2
0
def DoRec():
	global LastGeneIndex
	global LastExonStart
	global LastLabel
	global LastExonEnd
	global IntronCounts
	global ExonCounts

	if gff.Feature != "exon":
		return

	GeneIndex = gff.GetRequiredIntAttr("gene_index")
	Key = gff.Label + "%%%" + str(GeneIndex)
	if Key not in ExonCounts.keys():
		ExonCounts[Key] = 1
	else:
		ExonCounts[Key] += 1
	# print "ExonCounts[%s] = %u" % (Key, ExonCounts[Key])
	
	Start = gff.Start
	End = gff.End
	Label = gff.Label
	
	if GeneIndex == LastGeneIndex and gff.Label == LastLabel and LastExonStart != -1:
		if LastExonStart != -1:
			gff.Label = LastLabel
			gff.Start = LastExonEnd + 1
			gff.End = Start - 1
			gff.Source = "exons2introns"
			gff.Feature = "intron"
			gff.Frame = "."
			gff.Strand = "."
			gff.Attrs = "gene_index %u; exons %u-%u,%u-%u;" % (GeneIndex, LastExonStart, LastExonEnd, Start, End)
			gff.WriteRec(sys.stdout)
			if Key not in IntronCounts.keys():
				IntronCounts[Key] = 1
			else:
				IntronCounts[Key] += 1
			# print >> sys.stderr, "IntronCounts[%s] = %u" % (Key, IntronCounts[Key])
		
	LastGeneIndex = GeneIndex
	LastExonStart = Start
	LastExonEnd = End
	LastLabel = Label
Пример #3
0
def DoRec():
    global LastGeneIndex
    global ExonStart
    global LastLabel
    global LastEnd
    global Attr

    if not IsExonFeature(gff.Feature):
        return

    GeneIndex = gff.GetRequiredIntAttr("gene_index")
    Start = gff.Start
    End = gff.End
    Feature = gff.Feature
    Label = gff.Label

    if GeneIndex != LastGeneIndex or gff.Label != LastLabel or gff.Start != LastEnd + 1:
        if ExonStart != -1:
            gff.Label = LastLabel
            gff.Start = ExonStart
            gff.End = LastEnd
            gff.Source = "cdsutr2exons"
            gff.Feature = "exon"
            gff.Frame = "."
            gff.Strand = "."
            gff.Attrs = "gene_index %u; ces %s;" % (LastGeneIndex, Attr)
            gff.WriteRec(sys.stdout)
            Attr = ""

        ExonStart = Start

    LastGeneIndex = GeneIndex
    LastStart = Start
    LastLabel = Label
    LastEnd = End
    s = "%s:%u-%u" % (Feature, Start, End)
    if Attr == "":
        Attr = s
    else:
        Attr += "," + s
Пример #4
0
def DoRec():
    gff.WriteRec(sys.stdout)
Пример #5
0
        GeneHis[GeneIndex] = gff.End
        GeneStrands[GeneIndex] = gff.Strand


Hi = -1
GeneLos = {}
GeneHis = {}
GeneStrands = {}

Label = ""
gff.GetRecs(FileName, DoRec)

gff.Source = "gene_lengths"
gff.Feature = "gene"
gff.Score = 0
gff.Frame = "."

TotGeneLength = 0
for GeneIndex in GeneLos.keys():
    gff.Start = GeneLos[GeneIndex]
    gff.End = GeneHis[GeneIndex]
    gff.Strand = GeneStrands[GeneIndex]
    gff.Attrs = "gene_index %u;" % GeneIndex
    gff.WriteRec(sys.stdout)
    TotGeneLength += gff.End - gff.Start + 1

print >> sys.stderr, "Max annot end     %10u" % Hi
print >> sys.stderr, "Total gene length %10u" % TotGeneLength
print >> sys.stderr, "Pct               %10.1f%%" % (float(TotGeneLength) *
                                                     100 / Hi)