Exemplo n.º 1
0
    def CountDistribution(self):
        """print distribution of number of units and sequences per
        family.
        """
        self.PrintStatus()

        print """#
# NUM:          number of units/sequences per family
# NUNITS:       number of families with x units
# NSEQ:         number of families with x sequences
#"""
        print "NUM\tNUNITS\tNSEQ"
        sys.stdout.flush()

        histograms = []

        class_name = self.mTableDomains.GetFieldNameClass()

        statement = "SELECT COUNT(*) FROM %s GROUP BY %s" % (
            self.mTableNameDomains, class_name)
        data = map(lambda x: x[0], self.dbhandle.Execute(statement).fetchall())
        h1 = Histogram.Calculate(data)
        histograms.append(h1)

        statement = "SELECT COUNT(DISTINCT nid) FROM %s GROUP BY %s" % (
            self.mTableNameDomains, class_name)
        data = map(lambda x: x[0], self.dbhandle.Execute(statement).fetchall())
        h2 = Histogram.Calculate(data)
        histograms.append(h2)

        ch = Histogram.Combine(histograms)
        Histogram.Print(ch)
    def xCountDistribution( self ):
        """print distribution of number of units and sequences per
        family.
        """
        self.PrintStatus()
            
        print """#
# NUM:          number of units/sequences per family
# NUNITS:       number of families with x units
# NSEQ:         number of families with x sequences
#"""
        print "num\tnunits\tnseq"
        sys.stdout.flush()

        histograms = []
        
        statement = "SELECT nunits, COUNT(*) FROM %s GROUP BY nunits" % self.mTableNameFamilies
        h1 = self.dbhandle.Execute( statement ).fetchall()        
        histograms.append( h1 )
        
        statement = "SELECT nsequences, COUNT(*) FROM %s GROUP BY nsequences" % self.mTableNameFamilies
        h2 = self.dbhandle.Execute( statement ).fetchall()
        histograms.append( h2 )
        
        ch = Histogram.Combine( histograms )
        Histogram.Print(ch)        
    def DomainDistribution( self ):
        """
        distribution of domains per sequence
        """
        self.PrintStatus()
        if self.mLogLevel >= 1:
            sys.stdout.flush()

        print """#
# COUNTS:       number 
# NDOMAINS:       number of domains per sequence
# NDOMAINS/SIN:   number of domains without singletons per sequence
# NMOBILES:       number of mobile modules per sequence
#"""
        
        print "length\tndomains\tndomains/sin\tnmobiles"
        sys.stdout.flush()

        histograms = []
        
        statement = "SELECT COUNT(*) FROM %s GROUP BY nid" % self.mTableNameDomains
        d1 = map(lambda x: x[0], self.dbhandle.Execute( statement ).fetchall())

        histograms.append( Histogram.Calculate( d1 ) )

        statement = "SELECT COUNT(*) FROM %s AS d, %s AS f WHERE f.family = d.family AND f.nunits > 1 GROUP BY d.nid" % (self.mTableNameDomains, self.mTableNameFamilies)
        d2 = map(lambda x: x[0], self.dbhandle.Execute( statement ).fetchall())
        
        histograms.append( Histogram.Calculate( d2 ) )

        statement = "SELECT COUNT(*) FROM %s AS d, %s AS f WHERE f.family = d.family GROUP BY d.nid" % (self.mTableNameDomains, self.mTableNameSubset)
        d3 = map(lambda x: x[0], self.dbhandle.Execute( statement ).fetchall())
        
        histograms.append( Histogram.Calculate( d3 ) )

        ch = Histogram.Combine( histograms )

        Histogram.Print(ch)        
    def LengthDistribution( self ):
        """print distribution of unit length of families
        """
        self.PrintStatus()
            
        print """#
# LENGTH:       number of units/sequences per family
# NUNITS:       number of domains of that length
# NUNITS/SIN:   number of domains without singletons
# NSIN:         number of singletons with that length
#"""
        print "length\tnunits\tnunits/sin\tnsin"
        sys.stdout.flush()

        histograms = []
        
        statement = "SELECT CEILING((end-start+1)/10) * 10 AS olength, COUNT(*) FROM %s GROUP BY olength" % self.mTableNameDomains
        h1 = self.dbhandle.Execute( statement ).fetchall()        
        histograms.append( h1 )
        
        statement = "SELECT CEILING((end-start+1)/10) * 10 AS dlength, COUNT(*) FROM %s AS a, %s AS d " %\
                    (self.mTableNameDomains, self.mTableNameFamilies) +\
                    " WHERE d.family = a.family AND d.nunits > 1 GROUP BY dlength"
        
        h2 = self.dbhandle.Execute( statement ).fetchall()
        histograms.append( h2 )
        
        statement = "SELECT CEILING((end-start+1)/10) * 10 AS alength, COUNT(*) FROM %s AS a, %s AS d " %\
                    (self.mTableNameDomains, self.mTableNameFamilies) +\
                    " WHERE d.family = a.family AND d.nunits = 1 GROUP BY alength"
        
        h3 = self.dbhandle.Execute( statement ).fetchall()
        histograms.append( h3 )

        ch = Histogram.Combine( histograms )

        Histogram.Print(ch)        
Exemplo n.º 5
0
            continue

        header.append(filename)
        infile = open(filename, "r")

        h = []
        while 1:

            line = infile.readline()
            if not line: break

            if line[0] == "#": continue

            if not re.match("(\d+)", line): continue

            data = map(string.atof, re.split("\s+", line[:-1]))

            h.append((data[0], tuple(data[1:])))

        infile.close()

        histograms.append(h)

    print "# bin\t" + string.join(header, "\t\t")
    ch = Histogram.Combine(histograms)
    Histogram.Print(ch)

    ch = Histogram.Normalize(ch)
    print "# bin\t" + string.join(header, "\t\t")
    Histogram.Print(ch)
Exemplo n.º 6
0
            if options.normalize: h = Histogram.Normalize(h)
            if options.cumulative: h = Histogram.Cumulate(h)
            if options.reverse_cumulative:
                h = Histogram.Cumulate(h, direction=0)

            hists.append(h)

            for m in options.append:
                if m == "normalize":
                    hists.append(Histogram.Normalize(h))

            if options.headers:
                titles.append(options.headers[x])
            elif options.titles:
                titles.append(options.titles[x])
            else:
                titles.append("col%i" % options.columns[x])

        if titles:
            options.stdout.write("bin\t" + "\t".join(titles) + "\n")

        if len(hists) == 1:
            Histogram.Print(hists[0], nonull=options.nonull)
        else:
            combined_histogram = Histogram.Combine(
                hists, missing_value=options.missing_value)
            Histogram.Print(combined_histogram, nonull=options.nonull)

    Experiment.Stop()