Exemplo n.º 1
0
    def NumAssignments(self):
        """Write number of assignments
        """

        if self.mLogLevel >= 1:
            print "# instance of <" + str(
                self.__class__) + "> on " + time.asctime(
                    time.localtime(time.time()))
            print "# source: %s" % (self.mTableNameDomains)
            print "# hubs between domains"
            print string.join(("nid", "nassignments", "nclasses"), "\t")
            sys.stdout.flush()

        result = self.mTableDomains.GetNumAssignments()

        for r in result:
            print string.join(map(str, r), "\t")

        data = map(lambda x: x[1], result)
        h = Histogram.Calculate(data)
        print "# histogram of number of domains per sequence"
        Histogram.Print(h)

        data = map(lambda x: x[2], result)
        h = Histogram.Calculate(data)
        print "# histogram of number of different domains per sequence"
        Histogram.Print(h)
Exemplo n.º 2
0
    def CountDistribution(self):
        """print distribution of number of units and sequences per
        family.
        """
        self.PrintStatus()

        print """#
# NUM:          number of units/sequences per family
# NUNITS:       number of families with x units
# NSEQ:         number of families with x sequences
#"""
        print "NUM\tNUNITS\tNSEQ"
        sys.stdout.flush()

        histograms = []

        class_name = self.mTableDomains.GetFieldNameClass()

        statement = "SELECT COUNT(*) FROM %s GROUP BY %s" % (
            self.mTableNameDomains, class_name)
        data = map(lambda x: x[0], self.dbhandle.Execute(statement).fetchall())
        h1 = Histogram.Calculate(data)
        histograms.append(h1)

        statement = "SELECT COUNT(DISTINCT nid) FROM %s GROUP BY %s" % (
            self.mTableNameDomains, class_name)
        data = map(lambda x: x[0], self.dbhandle.Execute(statement).fetchall())
        h2 = Histogram.Calculate(data)
        histograms.append(h2)

        ch = Histogram.Combine(histograms)
        Histogram.Print(ch)
Exemplo n.º 3
0
def printHistogram(values, section, options, min_value=0, increment=1.0):

    outfile = open(options.output_filename_pattern % section, "w")
    h = Histogram.Calculate(values,
                            no_empty_bins=True,
                            min_value=0,
                            increment=1.0)

    outfile.write("bin\t%s\n" % section)
    for bin, val in h:
        outfile.write("%5.2f\t%i\n" % (bin, val))
    outfile.close()
    def DomainDistribution( self ):
        """
        distribution of domains per sequence
        """
        self.PrintStatus()
        if self.mLogLevel >= 1:
            sys.stdout.flush()

        print """#
# COUNTS:       number 
# NDOMAINS:       number of domains per sequence
# NDOMAINS/SIN:   number of domains without singletons per sequence
# NMOBILES:       number of mobile modules per sequence
#"""
        
        print "length\tndomains\tndomains/sin\tnmobiles"
        sys.stdout.flush()

        histograms = []
        
        statement = "SELECT COUNT(*) FROM %s GROUP BY nid" % self.mTableNameDomains
        d1 = map(lambda x: x[0], self.dbhandle.Execute( statement ).fetchall())

        histograms.append( Histogram.Calculate( d1 ) )

        statement = "SELECT COUNT(*) FROM %s AS d, %s AS f WHERE f.family = d.family AND f.nunits > 1 GROUP BY d.nid" % (self.mTableNameDomains, self.mTableNameFamilies)
        d2 = map(lambda x: x[0], self.dbhandle.Execute( statement ).fetchall())
        
        histograms.append( Histogram.Calculate( d2 ) )

        statement = "SELECT COUNT(*) FROM %s AS d, %s AS f WHERE f.family = d.family GROUP BY d.nid" % (self.mTableNameDomains, self.mTableNameSubset)
        d3 = map(lambda x: x[0], self.dbhandle.Execute( statement ).fetchall())
        
        histograms.append( Histogram.Calculate( d3 ) )

        ch = Histogram.Combine( histograms )

        Histogram.Print(ch)        
Exemplo n.º 5
0
    vals = []

    # retrieve histogram
    lines = filter(lambda x: x[0] <> "#", sys.stdin.readlines())

    for l in lines:

        data = string.split(l[:-1], "\t")
        try:
            val = string.atof(data[param_column])
        except IndexError:
            print "# IndexError in line:", l[:-1]
            continue

        if param_upper_limit != None and val > param_upper_limit:
            val = param_upper_limit

        if param_lower_limit != None and val < param_lower_limit:
            val = param_lower_limit

        vals.append(val)

    lines = None

    h = Histogram.Calculate(vals,
                            no_empty_bins=param_empty_bins,
                            increment=param_bin_size)
    print "# num_values=%i" % len(vals)
    Histogram.Print(h, nonull=param_nonull)
Exemplo n.º 6
0
                options.stdlog.write("# no data\n")
            Experiment.Stop()
            sys.exit(0)

        for x in range(len(options.columns)):

            if options.loglevel >= 1:
                options.stdlog.write("# column=%i, num_values=%i\n" %
                                     (options.columns[x], len(vals[x])))

            if len(vals[x]) < options.min_data: continue

            h = Histogram.Calculate(
                vals[x],
                no_empty_bins=options.no_empty_bins,
                increment=options.bin_size,
                min_value=options.min_value,
                max_value=options.max_value,
                dynamic_bins=options.dynamic_bins,
                ignore_out_of_range=options.ignore_out_of_range)

            if options.normalize: h = Histogram.Normalize(h)
            if options.cumulative: h = Histogram.Cumulate(h)
            if options.reverse_cumulative:
                h = Histogram.Cumulate(h, direction=0)

            hists.append(h)

            for m in options.append:
                if m == "normalize":
                    hists.append(Histogram.Normalize(h))