示例#1
0
    def Commit(self):
        if not self.dbc:
            self.Connect()
        allTables = []

        import time
        start = time.time()

        pb = OWGUI.ProgressBar(self, iterations=1000)

        table = None

        ids = []
        for item in self.experimentsWidget.selectedItems():
            ids += str(item.text(5)).split(",")

        table = self.dbc.get_single_data(
            ids=ids,
            callback=pb.advance,
            exclude_constant_labels=self.excludeconstant)

        end = int(time.time() - start)

        pb.finish()

        #self.send("Example table", None)
        #        table.taxid = "352472"
        #        table.genesinrows = False

        from Orange.orng.orngDataCaching import data_hints
        data_hints.set_hint(table, "taxid", "352472", 10.0)
        data_hints.set_hint(table, "genesinrows", False, 10.0)

        self.send("Example table", table)
示例#2
0
    def chipdata(self, data):
        self.data = []
        if data:
            self.infob.setText("")
            numFiles = reduce(lambda a, b: a + len(b[1]), data, 0)
            lenSD = len(data)
            self.infoa.setText("%d set%s, total of %d data file%s." %
                               (lenSD, ["", "s"][lenSD != 1], numFiles,
                                ["", "s"][numFiles != 1]))
            numExamplesList = []
            # construct a list of ExampleTable lengths and a list of attribute names
            for (name, etList) in data:
                for et in etList:
                    setattr(et, "dirname", name)
                    setattr(et, "strain", name)
                    self.data.append(et)
                    numExamplesList.append(len(et))
            if len(self.data) > 1:
                # test that files contain the same attributes and equal number of examples
                attrSorted = self.data[0].domain.attributes
                attrSorted.sort()
                numEx = len(self.data[0])
                for et in self.data[1:]:
                    attrSorted2 = et.domain.attributes
                    attrSorted2.sort()
                    if map(lambda x: x.name, attrSorted) != map(
                            lambda x: x.name, attrSorted2):
                        self.data = []
                        self.infob.setText(
                            "Error: data files contain different attributes, aborting distance computation."
                        )
                        return
                    if len(et) != numEx:
                        self.data = []
                        self.infob.setText(
                            "Error: data files contain unequal number of examples, aborting distance computation."
                        )
                        return
                # compute distances
                pb = OWGUI.ProgressBar(self, iterations=len(self.data))
                self.computeMatrix()
                pb.finish()

            else:
                self.data = []
                self.infob.setText(
                    'Error: not enough data, aborting distance computation.')
        else:
            self.infoa.setText('No data on input.')
示例#3
0
    def update_distances(self, base_indices=()):
        """Recompute the experiment distances.
        """
        distance = self.selected_distance()
        if base_indices == ():
            base_group_index = self.selected_base_group_index()
            base_indices = [ind[base_group_index] \
                            for _, ind in self.groups]

        assert (len(base_indices) == len(self.groups))

        base_distances = []
        attributes = self.data.domain.attributes
        pb = OWGUI.ProgressBar(self, len(self.groups) * \
                               len(attributes))

        cached_distances, filled_set = self.get_cached_distances(distance)

        for (group, indices), base_index in zip(self.groups, base_indices):
            # Base column of the group
            if base_index is not None:
                base_vec = exp.linearize(self.data, [base_index])
                distances = []
                # Compute the distances between base column
                # and all the rest data columns.
                for i in range(len(attributes)):
                    if i == base_index:
                        distances.append(0.0)
                    elif self.get_cached_distance(distance, i,
                                                  base_index) is not None:
                        distances.append(
                            self.get_cached_distance(distance, i, base_index))
                    else:
                        vec_i = exp.linearize(self.data, [i])
                        dist = distance(base_vec, vec_i)
                        self.store_distance(distance, i, base_index, dist)
                        distances.append(dist)
                    pb.advance()

                base_distances.append(distances)
            else:
                base_distances.append(None)

        pb.finish()
        self.distances = base_distances
示例#4
0
    def compute(self, res=None, dm=None):

        collectionNames = [ self.geneSel[a] for a in self.gridSel ]

        organism = self.organismTaxids[self.organismIndex]

        if self.gsgo:
            collectionNames.append((("GO",),organism))
        if self.gskegg:
            collectionNames.append((("KEGG",),organism))

        self.geneSets = obiGeneSets.collections(*collectionNames)

        self.resultsOut(None)

        qApp.processEvents()
        self.res = res
        self.dm = dm

        clearListView(self.listView)
        self.addComment("Computing...")
        qApp.processEvents()

        self.phenVar = self.phenCands[self.selectedPhenVar][0]
        self.geneVar = self.geneCands[self.selectedGeneVar]

        if self.res == None and self.data:
            self.setSelMode(False)

            pb = OWGUI.ProgressBar(self, iterations=self.perms+2)

            if hasattr(self, "btnApply"):
                self.btnApply.setFocus()

            kwargs = {}
            dkwargs = {}

            dkwargs["phenVar"] = self.phenVar
            dkwargs["geneVar"] = self.geneVar

            if not obiGsea.already_have_correlations(self.data):

                selectedClasses = self.psel.getSelection()
                fc = "Phenotype group empty. Stopped."
                if len(selectedClasses[0]) == 0:
                    self.addComment(fc)
                    return
                elif len(selectedClasses[1]) == 0:
                    self.addComment(fc)
                    return

                dkwargs["classValues"] = selectedClasses

                dkwargs["atLeast"] = self.atLeast

                permtype = self.permutationTypes[self.ptype][1]
                kwargs["permutation"] = "class" if permtype == "p" else "genes"

            def ifr(case, t, f):
                if case: return t
                else: return f

            kwargs["minSize"] = \
                ifr(self.minSubsetSizeC, self.minSubsetSize, 1)
            kwargs["maxSize"] = \
                ifr(self.maxSubsetSizeC, self.maxSubsetSize, 1000000)
            kwargs["minPart"] = \
                ifr(self.minSubsetPartC, self.minSubsetPart/100.0, 0.0)


            #create gene matcher
            genematcher = obiGene.matcher([[obiGene.GMKEGG(organism)] + ([obiGene.GMDicty()] if organism == "352472"  else [])])

            #dkwargs["caseSensitive"] = self.csgm

            gso = obiGsea.GSEA(self.data, matcher=genematcher, **dkwargs)

            
            for gs in self.geneSets:
                gso.addGenesets([gs])
                qApp.processEvents()

            self.res = gso.compute(n=self.perms, callback=pb.advance, **kwargs)
            
            pb.finish()
示例#5
0
    def update_scores(self):
        """ Compute the scores and update the histogram.
        """
        self.clear_plot()
        self.error(0)
        label, values = self.current_target_selection
        if not self.data or label is None:
            return
        _, score_func, _, two_sample_test = self.score_methods[
            self.method_index]
        if two_sample_test:
            target = self.targets
            score_target = set(target)
            ind1, ind2 = score_func(
                self.data, self.genes_in_columns).test_indices(score_target)
            if not len(ind1) or not len(ind2):
                self.error(
                    0,
                    "Target labels most exclude/include at least one value.")
                return

        else:  # ANOVA should use all labels.
            target = dict(self.data_labels)[label]
            if self.genes_in_columns:
                target = [(label, t) for t in target]
            score_target = target


#            indices = score_func(self.data, self.genes_in_columns).test_indices(score_target)
# TODO: Check that each label has more than one measurement, raise warning otherwise.

        pb = OWGUI.ProgressBar(
            self, 4 + self.permutations_count if self.compute_null else 3)
        self.scores = dict(
            self.compute_scores(self.data,
                                score_func,
                                self.genes_in_columns,
                                score_target,
                                advance=pb.advance))
        pb.advance()
        if self.compute_null:
            self.null_dist = self.compute_null_distribution(
                self.data,
                score_func,
                self.genes_in_columns,
                score_target,
                self.permutations_count,
                advance=pb.advance)
        else:
            self.null_dist = []
        pb.advance()
        htype = self.histType[self.score_methods[self.method_index][2]]
        score_type = self.score_methods[self.method_index][0]
        self.histogram.type = htype
        if self.scores:
            self.histogram.setValues(self.scores.values())
            low, high = self.thresholds.get(score_type,
                                            (float("-inf"), float("inf")))
            minx, maxx = self.histogram.minx, self.histogram.maxx
            low, high = max(low, minx), min(high, maxx)

            if htype == "hiTail":
                low = high
            if htype == "lowTail":
                high = low

            self.histogram.setBoundary(low, high)

            if self.compute_null and self.null_dist:
                nullY, nullX = numpy.histogram(self.null_dist,
                                               bins=self.histogram.xData)
                nullY = nullY / self.permutations_count
                self.histogram.nullCurve = self.histogram.addCurve(
                    "nullCurve",
                    Qt.black,
                    Qt.black,
                    6,
                    symbol=QwtSymbol.NoSymbol,
                    style=QwtPlotCurve.Steps,
                    xData=nullX,
                    yData=nullY)

                minx = min(min(nullX), minx)
                maxx = max(max(nullX), maxx)
                miny = min(min(nullY), self.histogram.miny)
                maxy = max(max(nullY), self.histogram.maxy)
                spanx, spany = maxx - minx, maxy - miny
                self.histogram.setAxisScale(QwtPlot.xBottom,
                                            minx - 0.05 * spanx,
                                            maxx + 0.05 * spanx)
                self.histogram.setAxisScale(QwtPlot.yLeft, miny - 0.05 * spany,
                                            maxy + 0.05 * spany)

            state = dict(hiTail=(False, True),
                         lowTail=(True, False),
                         twoTail=(True, True))
            for spin, visible in zip(
                (self.upperBoundarySpin, self.lowerBoundarySpin),
                    state[self.histogram.type]):
                spin.setVisible(visible)

            # If this is a two sample test add markers to the left and right
            # plot indicating which target group is over-expressed in that
            # part
            if self.method_index in [0, 2, 6]:
                if self.method_index == 0:  ## fold change is centered on 1.0
                    x1, y1 = (self.histogram.minx + 1) / 2, self.histogram.maxy
                    x2, y2 = (self.histogram.maxx + 1) / 2, self.histogram.maxy
                else:
                    x1, y1 = (self.histogram.minx) / 2, self.histogram.maxy
                    x2, y2 = (self.histogram.maxx) / 2, self.histogram.maxy
                if self.genes_in_columns:
                    label = target[0][0]
                    target_values = [t[1] for t in target]
                    values = dict(self.data_labels)[label]
                else:
                    target_values = target
                    values = self.data_labels[0][1]

                left = ", ".join(v for v in values if v not in target_values)
                right = ", ".join(v for v in values if v in target_values)

                self.histogram.addMarker(left, x1, y1)
                self.histogram.addMarker(right, x2, y2)
            self.warning(0)
        else:
            self.warning(0, "No scores obtained.")
        self.histogram.replot()
        pb.advance()
        pb.finish()
        self.update_data_info_label()