Пример #1
0
 def commit(self):
     map_data = None
     if self.data and self.xpoints is not None and self.ypoints is not None \
             and self.xpoints * self.ypoints == len(self.data):
         used_names = [
             var.name
             for var in self.data.domain.variables + self.data.domain.metas
         ]
         xmeta = Orange.data.ContinuousVariable.make(
             get_unique_names(used_names, "X"))
         ymeta = Orange.data.ContinuousVariable.make(
             get_unique_names(used_names, "Y"))
         # add new variables for X and Y dimension ot the data domain
         metas = self.data.domain.metas + (xmeta, ymeta)
         domain = Orange.data.Domain(self.data.domain.attributes,
                                     self.data.domain.class_vars, metas)
         map_data = self.data.transform(domain)
         map_data[:,
                  xmeta] = np.tile(np.arange(self.xpoints),
                                   len(self.data) // self.xpoints).reshape(
                                       -1, 1)
         map_data[:, ymeta] = np.repeat(np.arange(self.ypoints),
                                        len(self.data) //
                                        self.ypoints).reshape(-1, 1)
     self.Outputs.map.send(map_data)
Пример #2
0
    def send_data(self):
        model = self.model

        clusters = [c if c >= 0 else np.nan for c in model.labels]
        k = len(set(clusters) - {np.nan})
        clusters = np.array(clusters).reshape(len(self.data), 1)
        core_samples = set(model.projector.core_sample_indices_)
        in_core = np.array([1 if (i in core_samples) else 0
                            for i in range(len(self.data))])
        in_core = in_core.reshape(len(self.data), 1)

        domain = self.data.domain
        attributes, classes = domain.attributes, domain.class_vars
        meta_attrs = domain.metas
        names = [var.name for var in chain(attributes, classes, meta_attrs) if var]

        u_clust_var = get_unique_names(names, "Cluster")
        clust_var = DiscreteVariable(
            u_clust_var, values=["C%d" % (x + 1) for x in range(k)])

        u_in_core = get_unique_names(names + [u_clust_var], "DBSCAN Core")
        in_core_var = DiscreteVariable(u_in_core, values=("0", "1"))

        x, y, metas = self.data.X, self.data.Y, self.data.metas

        meta_attrs += (clust_var, )
        metas = np.hstack((metas, clusters))
        meta_attrs += (in_core_var, )
        metas = np.hstack((metas, in_core))

        domain = Domain(attributes, classes, meta_attrs)
        new_table = Table(domain, x, y, metas, self.data.W)

        self._set_output_summary(new_table)
        self.Outputs.annotated_data.send(new_table)
Пример #3
0
    def send_data(self):
        model = self.model

        clusters = [c if c >= 0 else np.nan for c in model.labels]
        k = len(set(clusters) - {np.nan})
        clusters = np.array(clusters)
        core_samples = set(model.projector.core_sample_indices_)
        in_core = np.array([1 if (i in core_samples) else 0
                            for i in range(len(self.data))])

        domain = self.data.domain
        attributes, classes = domain.attributes, domain.class_vars
        meta_attrs = domain.metas
        names = [var.name for var in chain(attributes, classes, meta_attrs) if var]

        u_clust_var = get_unique_names(names, "Cluster")
        clust_var = DiscreteVariable(
            u_clust_var, values=["C%d" % (x + 1) for x in range(k)])

        u_in_core = get_unique_names(names + [u_clust_var], "DBSCAN Core")
        in_core_var = DiscreteVariable(u_in_core, values=("0", "1"))

        new_table = self.data.add_column(clust_var, clusters, to_metas=True)
        new_table = new_table.add_column(in_core_var, in_core, to_metas=True)

        self.Outputs.annotated_data.send(new_table)
Пример #4
0
 def __call__(self, data):
     # creates function for transforming data
     common = _EMSC(self.reference, self.badspectra, self.weights, self.order,
                    self.scaling, data.domain)
     # takes care of domain column-wise, by above transformation function
     atts = [a.copy(compute_value=EMSCFeature(i, common))
             for i, a in enumerate(data.domain.attributes)]
     model_metas = []
     n_badspec = len(self.badspectra) if self.badspectra is not None else 0
     used_names = set([var.name for var in data.domain.variables + data.domain.metas])
     if self.output_model:
         i = len(data.domain.attributes)
         for o in range(self.order+1):
             n = get_unique_names(used_names, "EMSC parameter " + str(o))
             model_metas.append(
                 Orange.data.ContinuousVariable(name=n,
                                                compute_value=EMSCModel(i, common)))
             i += 1
         for o in range(n_badspec):
             n = get_unique_names(used_names, "EMSC parameter bad spec " + str(o))
             model_metas.append(
                 Orange.data.ContinuousVariable(name=n,
                                                compute_value=EMSCModel(i, common)))
             i += 1
         n = get_unique_names(used_names, "EMSC scaling parameter")
         model_metas.append(
             Orange.data.ContinuousVariable(name=n,
                                            compute_value=EMSCModel(i, common)))
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas + tuple(model_metas))
     return data.from_table(domain, data)
Пример #5
0
 def __call__(self, data):
     # creates function for transforming data
     common = _ME_EMSC(reference=self.reference,
                       weights=self.weights,
                       ncomp=self.ncomp,
                       alpha0=self.alpha0,
                       gamma=self.gamma,
                       maxNiter=self.maxNiter,
                       fixedNiter=self.fixedNiter,
                       positiveRef=self.positiveRef,
                       domain=data.domain)
     # takes care of domain column-wise, by above transformation function
     atts = [
         a.copy(compute_value=ME_EMSCFeature(i, common))
         for i, a in enumerate(data.domain.attributes)
     ]
     model_metas = []
     n_badspec = self.ncomp
     # Check if function knows about bad spectra
     used_names = set(
         [var.name for var in data.domain.variables + data.domain.metas])
     if self.output_model:
         i = len(data.domain.attributes)
         for o in range(1):
             n = get_unique_names(used_names, "EMSC parameter " + str(o))
             model_metas.append(
                 Orange.data.ContinuousVariable(name=n,
                                                compute_value=ME_EMSCModel(
                                                    i, common)))
             i += 1
         for o in range(n_badspec):
             n = get_unique_names(used_names,
                                  "EMSC parameter bad spec " + str(o))
             model_metas.append(
                 Orange.data.ContinuousVariable(name=n,
                                                compute_value=ME_EMSCModel(
                                                    i, common)))
             i += 1
         n = get_unique_names(used_names, "EMSC scaling parameter")
         model_metas.append(
             Orange.data.ContinuousVariable(name=n,
                                            compute_value=ME_EMSCModel(
                                                i, common)))
         i += 1
         n = get_unique_names(used_names, "Number of iterations")
         model_metas.append(
             Orange.data.ContinuousVariable(name=n,
                                            compute_value=ME_EMSCModel(
                                                i, common)))
         i += 1
         n = get_unique_names(used_names, "RMSE")
         model_metas.append(
             Orange.data.ContinuousVariable(name=n,
                                            compute_value=ME_EMSCModel(
                                                i, common)))
     domain = Orange.data.Domain(atts, data.domain.class_vars,
                                 data.domain.metas + tuple(model_metas))
     return data.from_table(domain, data)
Пример #6
0
    def commit(self):
        transformed = data = components = None
        if self._pca is not None:
            if self._transformed is None:
                # Compute the full transform (MAX_COMPONENTS components) once.
                self._transformed = self._pca(self.data)
            transformed = self._transformed

            if self._variance_ratio is not None:
                for var, explvar in zip(
                        transformed.domain.attributes,
                        self._variance_ratio[:self.ncomponents]):
                    var.attributes["variance"] = round(explvar, 6)
            domain = Domain(transformed.domain.attributes[:self.ncomponents],
                            self.data.domain.class_vars,
                            self.data.domain.metas)
            transformed = transformed.from_table(domain, transformed)

            # prevent caching new features by defining compute_value
            proposed = [a.name for a in self._pca.orig_domain.attributes]
            meta_name = get_unique_names(proposed, 'components')
            meta_vars = [StringVariable(name=meta_name)]
            metas = numpy.array(
                [['PC{}'.format(i + 1) for i in range(self.ncomponents)]],
                dtype=object).T
            if self._variance_ratio is not None:
                variance_name = get_unique_names(proposed, "variance")
                meta_vars.append(ContinuousVariable(variance_name))
                metas = numpy.hstack(
                    (metas, self._variance_ratio[:self.ncomponents, None]))

            dom = Domain([
                ContinuousVariable(name, compute_value=lambda _: None)
                for name in proposed
            ],
                         metas=meta_vars)
            components = Table(dom,
                               self._pca.components_[:self.ncomponents],
                               metas=metas)
            components.name = 'components'

            data_dom = Domain(self.data.domain.attributes,
                              self.data.domain.class_vars,
                              self.data.domain.metas + domain.attributes)
            data = Table.from_numpy(data_dom,
                                    self.data.X,
                                    self.data.Y,
                                    numpy.hstack(
                                        (self.data.metas, transformed.X)),
                                    ids=self.data.ids)

        self._pca_projector.component = self.ncomponents
        self.Outputs.transformed_data.send(transformed)
        self.Outputs.components.send(components)
        self.Outputs.data.send(data)
        self.Outputs.pca.send(self._pca_projector)
Пример #7
0
def concatenate_data(tables, filenames, label):
    if not tables:
        return None

    orig_tables = tables

    # prepare xs from the spectral specific tables for join into a common domain
    spectral_specific_domains = []
    xss = [
        t.special_spectral_data[0] for t in tables
        if hasattr(t, "special_spectral_data")
    ]
    xs = reduce(numpy_union_keep_order, xss, np.array([]))
    if len(xs):
        attrs = [ContinuousVariable("%f" % f) for f in xs]
        spectral_specific_domains = [Domain(attrs, None, None)]

    domain = _merge_domains(spectral_specific_domains +
                            [table.domain for table in tables])
    name = get_unique_names(domain, "Filename")
    source_var = StringVariable(name)
    name = get_unique_names(domain, "Label")
    label_var = StringVariable(name)
    domain = add_columns(domain, metas=(source_var, label_var))

    # concatenate tables
    tables = [table.transform(domain) for table in tables]
    data = type(tables[0]).concatenate(tables)

    with data.unlocked():
        # fill in spectral data
        xs_sind = np.argsort(xs)
        xs_sorted = xs[xs_sind]
        pos = 0
        for table in orig_tables:
            if hasattr(table, "special_spectral_data"):
                special = table.special_spectral_data
                indices = xs_sind[np.searchsorted(xs_sorted, special[0])]
                data.X[pos:pos + len(table), indices] = special[1]
            pos += len(table)

        data[:, source_var] = np.array(
            list(
                chain(*(repeat(fn, len(table))
                        for fn, table in zip(filenames, tables))))).reshape(
                            -1, 1)
        data[:, label_var] = np.array(
            list(
                chain(*(repeat(label, len(table))
                        for _, table in zip(filenames, tables))))).reshape(
                            -1, 1)

    return data
Пример #8
0
    def test_get_unique_names_duplicated_proposals(self):
        names = ["foo", "bar", "baz", "baz (3)"]

        self.assertEqual(get_unique_names(names, ["foo", "boo", "boo"]),
                         ['foo (1)', 'boo (1)', 'boo (2)'])
        self.assertEqual(get_unique_names(names, ["foo", "boo", "boo", "baz"]),
                         ['foo (4)', 'boo (4)', 'boo (5)', 'baz (4)'])
        self.assertEqual(get_unique_names([], ["foo", "boo", "boo", "baz"]),
                         ['foo', 'boo (1)', 'boo (2)', 'baz'])
        self.assertEqual(
            get_unique_names(["foo", "bong"], ["foo", "boo", "boo", "baz"]),
            ['foo (1)', 'boo (1)', 'boo (2)', 'baz'])

        self.assertEqual(
            get_unique_names(names, ["foo", "boo", "boo"],
                             equal_numbers=False),
            ['foo (1)', 'boo (1)', 'boo (2)'])
        self.assertEqual(
            get_unique_names(names, ["foo", "boo", "boo", "baz"],
                             equal_numbers=False),
            ['foo (1)', 'boo (1)', 'boo (2)', 'baz (4)'])
        self.assertEqual(
            get_unique_names([], ["foo", "boo", "boo", "baz"],
                             equal_numbers=False),
            ['foo', 'boo (1)', 'boo (2)', 'baz'])
        self.assertEqual(
            get_unique_names(["foo", "bong"], ["foo", "boo", "boo", "baz"],
                             equal_numbers=False),
            ['foo (1)', 'boo (1)', 'boo (2)', 'baz'])
Пример #9
0
    def _prepare_data(self):
        indices = self.tableview.selectedIndexes()
        indices = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        actual = self.results.actual
        learner_name = self.learners[self.selected_learner[0]]
        predicted = self.results.predicted[self.selected_learner[0]]
        selected = [
            i for i, t in enumerate(zip(actual, predicted)) if t in indices
        ]

        extra = []
        class_var = self.data.domain.class_var
        metas = self.data.domain.metas
        attrs = self.data.domain.attributes
        names = [var.name for var in chain(metas, [class_var], attrs)]

        if self.append_predictions:
            extra.append(predicted.reshape(-1, 1))
            proposed = "{}({})".format(class_var.name, learner_name)
            name = get_unique_names(names, proposed)
            var = Orange.data.DiscreteVariable(name, class_var.values)
            metas = metas + (var, )

        if self.append_probabilities and \
                        self.results.probabilities is not None:
            probs = self.results.probabilities[self.selected_learner[0]]
            extra.append(np.array(probs, dtype=object))
            names = [f"p({value})" for value in class_var.values]
            names = get_unique_names(self.data.domain, names)
            metas += tuple(map(Orange.data.ContinuousVariable, names))

        domain = Orange.data.Domain(self.data.domain.attributes,
                                    self.data.domain.class_vars, metas)
        data = self.data.transform(domain)
        if extra:
            with data.unlocked(data.metas):
                data.metas[:, len(self.data.domain.metas):] = \
                    np.hstack(tuple(extra))
        data.name = learner_name

        if selected:
            annotated_data = create_annotated_table(data, selected)
            data = data[selected]
        else:
            annotated_data = create_annotated_table(data, [])
            data = None

        return data, annotated_data
Пример #10
0
    def _commit_predictions(self):
        if not self.data:
            self.Outputs.predictions.send(None)
            return

        newmetas = []
        newcolumns = []
        for slot in self._non_errored_predictors():
            if slot.predictor.domain.class_var.is_discrete:
                self._add_classification_out_columns(slot, newmetas, newcolumns)
            else:
                self._add_regression_out_columns(slot, newmetas, newcolumns)

        attrs = list(self.data.domain.attributes)
        metas = list(self.data.domain.metas)
        names = [var.name for var in chain(attrs, self.data.domain.class_vars, metas) if var]
        uniq_newmetas = []
        for new_ in newmetas:
            uniq = get_unique_names(names, new_.name)
            if uniq != new_.name:
                new_ = new_.copy(name=uniq)
            uniq_newmetas.append(new_)
            names.append(uniq)

        metas += uniq_newmetas
        domain = Orange.data.Domain(attrs, self.class_var, metas=metas)
        predictions = self.data.transform(domain)
        if newcolumns:
            newcolumns = numpy.hstack(
                [numpy.atleast_2d(cols) for cols in newcolumns])
            predictions.metas[:, -newcolumns.shape[1]:] = newcolumns
        self.Outputs.predictions.send(predictions)
Пример #11
0
    def commit(self):
        self.infolabel.setText(' ')

        kwargs = {'iterations': self.iterations}
        if self.method == 0:
            alg = cd.label_propagation

        elif self.method == 1:
            alg = cd.label_propagation_hop_attenuation
            kwargs['delta'] = self.hop_attenuation

        if self.net is None:
            self.Outputs.items.send(None)
            self.Outputs.network.send(None)
            self.cluster_feature = None
            return

        if self.use_random_state:
            kwargs['seed'] = 0

        labels = alg(self.net, **kwargs)
        domain = self.net.nodes.domain
        # Tie a name for presenting clustering results to the widget instance
        if self.cluster_feature is None:
            self.cluster_feature = get_unique_names(domain, 'Cluster')
        cd.add_results_to_items(self.net, labels, self.cluster_feature)

        self.infolabel.setText('%d clusters found' % len(set(labels.values())))
        self.Outputs.items.send(self.net.nodes)
        self.Outputs.network.send(self.net)
Пример #12
0
    def commit(self):
        kwargs = {'iterations': self.iterations}
        if self.attenuate:
            alg = cd.label_propagation_hop_attenuation
            kwargs['delta'] = self.hop_attenuation
        else:
            alg = cd.label_propagation

        if self.net is None:
            self.Outputs.items.send(None)
            self.Outputs.network.send(None)
            self.cluster_feature = None
            self.info.set_output_summary(self.info.NoOutput)
            return

        if self.use_random_state:
            kwargs['seed'] = 0

        labels = alg(self.net, **kwargs)
        domain = self.net.nodes.domain
        # Tie a name for presenting clustering results to the widget instance
        if self.cluster_feature is None:
            self.cluster_feature = get_unique_names(domain, 'Cluster')
        cd.add_results_to_items(self.net, labels, self.cluster_feature)

        self.Outputs.items.send(self.net.nodes)
        self.Outputs.network.send(self.net)

        nclusters = len(set(labels.values()))
        self.info.set_output_summary(nclusters, f"{nclusters} clusters")
Пример #13
0
    def _add_metadata(self, corpus: Corpus) -> Corpus:
        if (corpus is None or "path" not in corpus.domain
                or self._meta_data is None
                or (self.META_DATA_FILE_KEY not in self._meta_data.columns
                    and self.CONLLU_META_DATA not in self._meta_data.columns)):
            return corpus

        if self.is_conllu:
            df = self._meta_data.set_index(self.CONLLU_META_DATA)
            path_column = corpus.get_column_view("utterance")[0]
        else:
            df = self._meta_data.set_index(
                self.startdir + self._meta_data[self.META_DATA_FILE_KEY])
            path_column = corpus.get_column_view("path")[0]

        if len(df.index.drop_duplicates()) != len(df.index):
            df = df[~df.index.duplicated(keep='first')]
        filtered = df.reindex(path_column)
        for name, column in filtered.iteritems():
            data = column.astype(str).values
            val_map, vals, var_type = guess_data_type(data)
            values, variable = sanitize_variable(val_map,
                                                 vals,
                                                 data,
                                                 var_type, {},
                                                 name=get_unique_names(
                                                     corpus.domain, name))
            corpus = corpus.add_column(variable, values, to_metas=True)
        return corpus
Пример #14
0
    def _send_data(self):
        if self.partition is None or self.data is None:
            return
        domain = self.data.domain
        # Compute the frequency of each cluster index
        counts = np.bincount(self.partition)
        indices = np.argsort(counts)[::-1]
        index_map = {n: o for n, o in zip(indices, range(len(indices)))}
        new_partition = list(map(index_map.get, self.partition))

        cluster_var = DiscreteVariable(
            get_unique_names(domain, 'Cluster'),
            values=[
                'C%d' % (i + 1) for i, _ in enumerate(np.unique(new_partition))
            ])

        new_domain = add_columns(domain, metas=[cluster_var])
        new_table = self.data.transform(new_domain)
        new_table.get_column_view(cluster_var)[0][:] = new_partition
        self.Outputs.annotated_data.send(new_table)

        if Graph is not None:
            graph = Graph(self.graph)
            graph.set_items(new_table)
            self.Outputs.graph.send(graph)
Пример #15
0
    def _send_output(self) -> None:
        """
        Create corpus with scores and output it
        """
        if self.corpus is None:
            self.Outputs.corpus.send(None)
            self.Outputs.selected_documents.send(None)
            return

        scores, labels = self._gather_scores()
        if labels:
            d = self.corpus.domain
            domain = Domain(
                d.attributes,
                d.class_var,
                metas=d.metas + tuple(
                    ContinuousVariable(get_unique_names(d, l))
                    for l in labels),
            )
            out_corpus = Corpus(
                domain,
                self.corpus.X,
                self.corpus.Y,
                np.hstack([self.corpus.metas, scores]),
            )
            Corpus.retain_preprocessing(self.corpus, out_corpus)
        else:
            out_corpus = self.corpus

        self.Outputs.corpus.send(
            create_annotated_table(out_corpus, self.selected_rows))
        self.Outputs.selected_documents.send(
            out_corpus[self.selected_rows] if self.selected_rows else None)
Пример #16
0
 def test_get_unique_names(self):
     names = [
         "charlie", "bravo", "charlie (2)", "charlie (3)", "bravo (2)",
         "charlie (4)", "bravo (3)"
     ]
     self.assertEqual(get_unique_names(names, ["bravo", "charlie"]),
                      ["bravo (5)", "charlie (5)"])
Пример #17
0
 def _get_projection_variables(self):
     domain = self.data.domain
     names = get_unique_names(
         [v.name for v in domain.variables + domain.metas],
         self.embedding_variables_names
     )
     return ContinuousVariable(names[0]), ContinuousVariable(names[1])
Пример #18
0
    def commit(self):
        self.infolabel.setText(' ')

        if self.method == 0:
            alg = cd.label_propagation
            kwargs = {'iterations': self.iterations}

        elif self.method == 1:
            alg = cd.label_propagation_hop_attenuation
            kwargs = {
                'iterations': self.iterations,
                'delta': self.hop_attenuation
            }

        if self.net is None:
            self.Outputs.items.send(None)
            self.Outputs.network.send(None)
            return

        labels = alg(self.net, **kwargs)
        domain = self.net.nodes.domain
        cd.add_results_to_items(self.net, labels,
                                get_unique_names(domain, 'Cluster'))

        self.infolabel.setText('%d clusters found' % len(set(labels.values())))
        self.Outputs.items.send(self.net.nodes)
        self.Outputs.network.send(self.net)
Пример #19
0
 def __call__(self, data):
     common = _IntegrateCommon(data.domain)
     atts = []
     if self.limits:
         methods = self.methods
         if not isinstance(methods, Iterable):
             methods = [methods] * len(self.limits)
         names = self.names
         if not names:
             names = [" - ".join("{0}".format(e) for e in l) for l in self.limits]
         # no names in data should be repeated
         used_names = [var.name for var in data.domain.variables + data.domain.metas]
         for i, n in enumerate(names):
             n = get_unique_names(used_names, n)
             names[i] = n
             used_names.append(n)
         for limits, method, name in zip(self.limits, methods, names):
             atts.append(Orange.data.ContinuousVariable(
                 name=name,
                 compute_value=method(limits, common)))
     if not self.metas:
         domain = Orange.data.Domain(atts, data.domain.class_vars,
                                     metas=data.domain.metas)
     else:
         domain = Orange.data.Domain(data.domain.attributes, data.domain.class_vars,
                                     metas=data.domain.metas + tuple(atts))
     return data.from_table(domain, data)
Пример #20
0
    def _transformed_domain(self):
        dom = self.data.domain
        orig_coords = (self.attr_lat, self.attr_lon)

        names = [var.name for var in orig_coords]
        if not self.replace_original:
            # If appending, use the same names, just with numbers for uniqueness
            existing = [v.name for v in chain(dom.variables, dom.metas)]
            names = get_unique_names(existing, names)

        transformer = Transformer.from_crs(self.EPSG_CODES[self.from_idx],
                                           self.EPSG_CODES[self.to_idx])
        transformer_common = GeoTransformerCommon(transformer, *orig_coords)
        coords = (ContinuousVariable(name,
                                     compute_value=GeoTransformer(
                                         transformer_common, col))
                  for col, name in enumerate(names))

        if self.replace_original:
            tr = dict(zip(orig_coords, coords))

            def r(variables):
                return [tr.get(var, var) for var in variables]

            return Domain(r(dom.attributes), r(dom.class_vars), r(dom.metas))

        # Put each new variable in attributes, if it was there, else to metas
        attrs, metas = list(dom.attributes), list(dom.metas)
        for orig, new in zip(orig_coords, coords):
            (attrs if orig in dom.attributes else metas).append(new)
        return Domain(attrs, dom.class_vars, metas)
Пример #21
0
    def _send_data(self):
        if self.partition is None or self.data is None:
            return
        domain = self.data.domain
        # Compute the frequency of each cluster index
        counts = np.bincount(self.partition)
        indices = np.argsort(counts)[::-1]
        index_map = {n: o for n, o in zip(indices, range(len(indices)))}
        new_partition = list(map(index_map.get, self.partition))

        cluster_var = DiscreteVariable(
            get_unique_names(domain, "Cluster"),
            values=[
                "C%d" % (i + 1) for i, _ in enumerate(np.unique(new_partition))
            ])

        new_domain = add_columns(domain, metas=[cluster_var])
        new_table = self.data.transform(new_domain)
        new_table.get_column_view(cluster_var)[0][:] = new_partition

        summary = len(new_table) if new_table else self.info.NoOutput
        details = format_summary_details(new_table) if new_table else ""
        self.info.set_output_summary(summary, details)

        self.Outputs.annotated_data.send(new_table)

        if Network is not None:
            n_edges = self.graph.number_of_edges()
            edges = sp.coo_matrix(
                (np.ones(n_edges), np.array(self.graph.edges()).T),
                shape=(n_edges, n_edges))
            graph = Network(new_table, edges)
            self.Outputs.graph.send(graph)
Пример #22
0
    def _commit_predictions(self):
        if not self.data:
            self.Outputs.predictions.send(None)
            self.info.set_output_summary(self.info.NoOutput)
            return

        newmetas = []
        newcolumns = []
        for slot in self._non_errored_predictors():
            if slot.predictor.domain.class_var.is_discrete:
                self._add_classification_out_columns(slot, newmetas,
                                                     newcolumns)
            else:
                self._add_regression_out_columns(slot, newmetas, newcolumns)

        attrs = list(self.data.domain.attributes)
        metas = list(self.data.domain.metas)
        names = [
            var.name
            for var in chain(attrs, self.data.domain.class_vars, metas) if var
        ]
        uniq_newmetas = []
        for new_ in newmetas:
            uniq = get_unique_names(names, new_.name)
            if uniq != new_.name:
                new_ = new_.copy(name=uniq)
            uniq_newmetas.append(new_)
            names.append(uniq)

        metas += uniq_newmetas
        domain = Orange.data.Domain(attrs, self.class_var, metas=metas)
        predictions = self.data.transform(domain)
        if newcolumns:
            newcolumns = numpy.hstack(
                [numpy.atleast_2d(cols) for cols in newcolumns])
            predictions.metas[:, -newcolumns.shape[1]:] = newcolumns

        index = self.dataview.model().index
        map_to = self.dataview.model().mapToSource
        assert self.selection_store is not None
        rows = None
        if self.selection_store.rows:
            rows = [
                ind.row()
                for ind in self.dataview.selectionModel().selectedRows(0)
            ]
            rows.sort()
        elif self.dataview.model().isSorted() \
                or self.predictionsview.model().isSorted():
            rows = list(range(len(self.data)))
        if rows:
            source_rows = [map_to(index(row, 0)).row() for row in rows]
            predictions = predictions[source_rows]
        self.Outputs.predictions.send(predictions)

        summary = str(len(predictions))
        details = format_summary_details(predictions)
        self.info.set_output_summary(summary, details)
Пример #23
0
 def create_unique_vars(self, names, proposed_names, values=()):
     unique_vars = []
     for proposed in proposed_names:
         uniq = get_unique_names(names, proposed)
         if values:
             unique_vars.append(DiscreteVariable(uniq, values))
         else:
             unique_vars.append(ContinuousVariable(uniq))
         names.append(uniq)
     return unique_vars, names
Пример #24
0
 def _data_with_similarity(self, indices):
     data = self.data
     varname = get_unique_names(data.domain, "distance")
     metas = data.domain.metas + (ContinuousVariable(varname), )
     domain = Domain(data.domain.attributes, data.domain.class_vars, metas)
     data_metas = self.distances[indices].reshape((-1, 1))
     if data.domain.metas:
         data_metas = np.hstack((data.metas[indices], data_metas))
     neighbors = Table(domain, data.X[indices], data.Y[indices], data_metas)
     neighbors.attributes = self.data.attributes
     return neighbors
Пример #25
0
 def send_components(self):
     components = None
     if self.data is not None and self.projection is not None:
         proposed = [var.name for var in self.effective_variables]
         comp_name = get_unique_names(proposed, 'component')
         meta_attrs = [StringVariable(name=comp_name)]
         domain = Domain(self.effective_variables, metas=meta_attrs)
         components = Table(domain, self._send_components_x(),
                            metas=self._send_components_metas())
         components.name = "components"
     self.Outputs.components.send(components)
Пример #26
0
    def apply(self):
        builtin_warn = warnings.warn

        def _handle_disconnected_graph_warning(msg, *args, **kwargs):
            if msg.startswith("Graph is not fully connected"):
                self.Warning.graph_not_connected()
            else:
                builtin_warn(msg, *args, **kwargs)

        out = None
        data = self.data
        method = self.MANIFOLD_METHODS[self.manifold_method_index]
        have_data = data is not None and len(data)
        self.Error.clear()
        self.Warning.clear()

        if have_data and data.is_sparse():
            self.Error.sparse_not_supported()
        elif have_data:
            names = [
                var.name
                for var in chain(data.domain.class_vars, data.domain.metas)
                if var
            ]
            proposed = ["C{}".format(i) for i in range(self.n_components)]
            unique = get_unique_names(names, proposed)
            domain = Domain([ContinuousVariable(name) for name in unique],
                            data.domain.class_vars, data.domain.metas)
            try:
                warnings.warn = _handle_disconnected_graph_warning
                projector = method(**self.get_method_parameters(data, method))
                model = projector(data)
                if isinstance(model, TSNEModel):
                    out = model.embedding
                else:
                    X = model.embedding_
                    out = Table(domain, X, data.Y, data.metas)
            except ValueError as e:
                if e.args[0] == "for method='hessian', n_neighbors " \
                                "must be greater than [n_components" \
                                " * (n_components + 3) / 2]":
                    n = self.n_components * (self.n_components + 3) / 2
                    self.Error.n_neighbors_too_small("{}".format(n))
                else:
                    self.Error.manifold_error(e.args[0])
            except MemoryError:
                self.Error.out_of_memory()
            except np.linalg.linalg.LinAlgError as e:
                self.Error.manifold_error(str(e))
            finally:
                warnings.warn = builtin_warn

        self._set_output_summary(out)
        self.Outputs.transformed_data.send(out)
Пример #27
0
def _table_with_annotation_column(data, values, column_data, var_name):
    var = DiscreteVariable(get_unique_names(data.domain, var_name), values)
    class_vars, metas = data.domain.class_vars, data.domain.metas
    if not data.domain.class_vars:
        class_vars += (var, )
    else:
        metas += (var, )
    domain = Domain(data.domain.attributes, class_vars, metas)
    table = data.transform(domain)
    table[:, var] = column_data.reshape((len(data), 1))
    return table
Пример #28
0
def _table_with_annotation_column(data, values, column_data, var_name):
    var = DiscreteVariable(get_unique_names(data.domain, var_name), values)
    class_vars, metas = data.domain.class_vars, data.domain.metas
    if not data.domain.class_vars:
        class_vars += (var, )
    else:
        metas += (var, )
    domain = Domain(data.domain.attributes, class_vars, metas)
    table = data.transform(domain)
    table[:, var] = column_data.reshape((len(data), 1))
    return table
Пример #29
0
 def _data_with_similarity(self, indices):
     data = self.data
     varname = get_unique_names(data.domain, "distance")
     metas = data.domain.metas + (ContinuousVariable(varname), )
     domain = Domain(data.domain.attributes, data.domain.class_vars, metas)
     data_metas = self.distances[indices].reshape((-1, 1))
     if data.domain.metas:
         data_metas = np.hstack((data.metas[indices], data_metas))
     neighbors = Table(domain, data.X[indices], data.Y[indices], data_metas)
     neighbors.attributes = self.data.attributes
     return neighbors
Пример #30
0
    def _fit_model(self, data: Table) -> EllipticEnvelopeClassifier:
        domain = data.domain
        model = super()._fit_model(data.transform(Domain(domain.attributes)))

        transformer = _Transformer(model)
        names = [v.name for v in domain.variables + domain.metas]
        variable = ContinuousVariable(get_unique_names(names, "Mahalanobis"),
                                      compute_value=transformer)

        transformer.variable = variable
        model.mahal_var = variable
        return model
Пример #31
0
    def _commit_predictions(self):
        if not self.data:
            self.Outputs.predictions.send(None)
            return

        newmetas = []
        newcolumns = []
        for slot in self._non_errored_predictors():
            if slot.predictor.domain.class_var.is_discrete:
                self._add_classification_out_columns(slot, newmetas,
                                                     newcolumns)
            else:
                self._add_regression_out_columns(slot, newmetas, newcolumns)

        attrs = list(self.data.domain.attributes)
        metas = list(self.data.domain.metas)
        names = [
            var.name
            for var in chain(attrs, self.data.domain.class_vars, metas) if var
        ]
        uniq_newmetas = []
        for new_ in newmetas:
            uniq = get_unique_names(names, new_.name)
            if uniq != new_.name:
                new_ = new_.copy(name=uniq)
            uniq_newmetas.append(new_)
            names.append(uniq)

        metas += uniq_newmetas
        domain = Orange.data.Domain(attrs, self.class_var, metas=metas)
        predictions = self.data.transform(domain)
        if newcolumns:
            newcolumns = numpy.hstack(
                [numpy.atleast_2d(cols) for cols in newcolumns])
            with predictions.unlocked(predictions.metas):
                predictions.metas[:, -newcolumns.shape[1]:] = newcolumns

        datamodel = self.dataview.model()
        predmodel = self.predictionsview.model()
        assert datamodel is not None  # because we have data
        assert self.selection_store is not None
        rows = numpy.array(list(self.selection_store.rows))
        if rows.size:
            # Reorder rows as they are ordered in view
            shown_rows = datamodel.mapFromSourceRows(rows)
            rows = rows[numpy.argsort(shown_rows)]
            predictions = predictions[rows]
        elif datamodel.sortColumn() >= 0 \
                or predmodel is not None and predmodel.sortColumn() > 0:
            # No selection: output all, but in the shown order
            predictions = predictions[datamodel.mapToSourceRows(...)]
        self.Outputs.predictions.send(predictions)
Пример #32
0
    def _fit_model(self, data: Table) -> _OutlierModel:
        domain = data.domain
        model = super()._fit_model(data.transform(Domain(domain.attributes)))

        transformer = _Transformer(model)
        names = [v.name for v in domain.variables + domain.metas]
        variable = DiscreteVariable(get_unique_names(names, "Outlier"),
                                    values=("Yes", "No"),
                                    compute_value=transformer)

        transformer.variable = variable
        model.outlier_var = variable
        return model
Пример #33
0
    def send_data(self):
        if self.optimize_k:
            row = self.selected_row()
            k = self.k_from + row if row is not None else None
        else:
            k = self.k

        km = self.clusterings.get(k)
        if self.data is None or km is None or isinstance(km, str):
            self.Outputs.annotated_data.send(None)
            self.Outputs.centroids.send(None)
            return

        domain = self.data.domain
        cluster_var = DiscreteVariable(
            get_unique_names(domain, "Cluster"),
            values=["C%d" % (x + 1) for x in range(km.k)]
        )
        clust_ids = km(self.data)
        silhouette_var = ContinuousVariable(
            get_unique_names(domain, "Silhouette"))
        if km.silhouette_samples is not None:
            self.Warning.no_silhouettes.clear()
            scores = np.arctan(km.silhouette_samples) / np.pi + 0.5
        else:
            self.Warning.no_silhouettes()
            scores = np.nan

        new_domain = add_columns(domain, metas=[cluster_var, silhouette_var])
        new_table = self.data.transform(new_domain)
        new_table.get_column_view(cluster_var)[0][:] = clust_ids.X.ravel()
        new_table.get_column_view(silhouette_var)[0][:] = scores

        centroids = Table(Domain(km.pre_domain.attributes), km.centroids)

        self.Outputs.annotated_data.send(new_table)
        self.Outputs.centroids.send(centroids)
Пример #34
0
    def _send_data(self):
        if self.partition is None or self.data is None:
            return
        domain = self.data.domain
        # Compute the frequency of each cluster index
        counts = np.bincount(self.partition)
        indices = np.argsort(counts)[::-1]
        index_map = {n: o for n, o in zip(indices, range(len(indices)))}
        new_partition = list(map(index_map.get, self.partition))

        cluster_var = DiscreteVariable(
            get_unique_names(domain, 'Cluster'),
            values=['C%d' % (i + 1) for i, _ in enumerate(np.unique(new_partition))]
        )

        new_domain = add_columns(domain, metas=[cluster_var])
        new_table = self.data.transform(new_domain)
        new_table.get_column_view(cluster_var)[0][:] = new_partition
        self.Outputs.annotated_data.send(new_table)

        if Graph is not None:
            graph = Graph(self.graph)
            graph.set_items(new_table)
            self.Outputs.graph.send(graph)
Пример #35
0
 def __get_var_names(self):
     n = self.n_components
     postfixes = ["-x", "-y"] if n == 2 else [str(i) for i in range(n)]
     names = [f"{self.var_prefix}{postfix}" for postfix in postfixes]
     domain = self.orig_domain.variables + self.orig_domain.metas
     return get_unique_names([v.name for v in domain], names)
Пример #36
0
 def _get_var_names(self, n):
     postfixes = ["x", "y"] if n == 2 else [str(i) for i in range(1, n + 1)]
     names = [f"{self.var_prefix}-{postfix}" for postfix in postfixes]
     return get_unique_names(self.orig_domain, names)
Пример #37
0
 def test_get_unique_names(self):
     names = ["charlie", "bravo", "charlie (2)", "charlie (3)", "bravo (2)", "charlie (4)",
              "bravo (3)"]
     self.assertEqual(get_unique_names(names, ["bravo", "charlie"]),
                      ["bravo (5)", "charlie (5)"])
Пример #38
0
 def _get_projection_variables(self):
     names = get_unique_names(
         self.data.domain, self.embedding_variables_names)
     return ContinuousVariable(names[0]), ContinuousVariable(names[1])
Пример #39
0
    def send_data(self):
        if self.optimize_k:
            row = self.selected_row()
            k = self.k_from + row if row is not None else None
        else:
            k = self.k

        km = self.clusterings.get(k)
        if self.data is None or km is None or isinstance(km, str):
            self.Outputs.annotated_data.send(None)
            self.Outputs.centroids.send(None)
            return

        domain = self.data.domain
        cluster_var = DiscreteVariable(
            get_unique_names(domain, "Cluster"),
            values=["C%d" % (x + 1) for x in range(km.k)]
        )
        clust_ids = km(self.data)
        clust_col = clust_ids.X.ravel()
        silhouette_var = ContinuousVariable(
            get_unique_names(domain, "Silhouette"))
        if km.silhouette_samples is not None:
            self.Warning.no_silhouettes.clear()
            scores = np.arctan(km.silhouette_samples) / np.pi + 0.5
            clust_scores = []
            for i in range(km.k):
                in_clust = clust_col == i
                if in_clust.any():
                    clust_scores.append(np.mean(scores[in_clust]))
                else:
                    clust_scores.append(0.)
            clust_scores = np.atleast_2d(clust_scores).T
        else:
            self.Warning.no_silhouettes()
            scores = np.nan
            clust_scores = np.full((km.k, 1), np.nan)

        new_domain = add_columns(domain, metas=[cluster_var, silhouette_var])
        new_table = self.data.transform(new_domain)
        new_table.get_column_view(cluster_var)[0][:] = clust_col
        new_table.get_column_view(silhouette_var)[0][:] = scores

        centroid_attributes = [
            attr.compute_value.variable
            if isinstance(attr.compute_value, ReplaceUnknowns)
            and attr.compute_value.variable in domain.attributes
            else attr
            for attr in km.pre_domain.attributes]
        centroid_domain = add_columns(
            Domain(centroid_attributes, [], domain.metas),
            metas=[cluster_var, silhouette_var])
        centroids = Table(
            centroid_domain, km.centroids, None,
            np.hstack((np.full((km.k, len(domain.metas)), np.nan),
                       np.arange(km.k).reshape(km.k, 1),
                       clust_scores))
        )
        if self.data.name == Table.name:
            centroids.name = "centroids"
        else:
            centroids.name = f"{self.data.name} centroids"

        self.Outputs.annotated_data.send(new_table)
        self.Outputs.centroids.send(centroids)
Пример #40
0
 def _get_var_names(self, n):
     names = [f"{self.var_prefix}{postfix}" for postfix in range(1, n + 1)]
     return get_unique_names(self.orig_domain, names)
Пример #41
0
    def test_get_unique_names_with_domain(self):
        a, b, c, d = map(ContinuousVariable, ["foo", "bar", "baz", "baz (3)"])
        domain = Domain([a, b], c, [d])
        self.assertEqual(get_unique_names(domain, ["qux"]), ["qux"])
        self.assertEqual(get_unique_names(domain, ["foo"]), ["foo (1)"])
        self.assertEqual(get_unique_names(domain, ["baz"]), ["baz (4)"])
        self.assertEqual(get_unique_names(domain, ["baz (3)"]), ["baz (3) (1)"])
        self.assertEqual(
            get_unique_names(domain, ["qux", "quux"]), ["qux", "quux"])
        self.assertEqual(
            get_unique_names(domain, ["bar", "baz"]), ["bar (4)", "baz (4)"])
        self.assertEqual(
            get_unique_names(domain, ["qux", "baz"]), ["qux (4)", "baz (4)"])
        self.assertEqual(
            get_unique_names(domain, ["qux", "bar"]), ["qux (1)", "bar (1)"])

        self.assertEqual(get_unique_names(domain, "qux"), "qux")
        self.assertEqual(get_unique_names(domain, "foo"), "foo (1)")
        self.assertEqual(get_unique_names(domain, "baz"), "baz (4)")
Пример #42
0
    def test_get_unique_names(self):
        names = ["foo", "bar", "baz", "baz (3)"]
        self.assertEqual(get_unique_names(names, ["qux"]), ["qux"])
        self.assertEqual(get_unique_names(names, ["foo"]), ["foo (1)"])
        self.assertEqual(get_unique_names(names, ["baz"]), ["baz (4)"])
        self.assertEqual(get_unique_names(names, ["baz (3)"]), ["baz (3) (1)"])
        self.assertEqual(
            get_unique_names(names, ["qux", "quux"]), ["qux", "quux"])
        self.assertEqual(
            get_unique_names(names, ["bar", "baz"]), ["bar (4)", "baz (4)"])
        self.assertEqual(
            get_unique_names(names, ["qux", "baz"]), ["qux (4)", "baz (4)"])
        self.assertEqual(
            get_unique_names(names, ["qux", "bar"]), ["qux (1)", "bar (1)"])

        self.assertEqual(get_unique_names(names, "qux"), "qux")
        self.assertEqual(get_unique_names(names, "foo"), "foo (1)")
        self.assertEqual(get_unique_names(names, "baz"), "baz (4)")

        self.assertEqual(get_unique_names(tuple(names), "baz"), "baz (4)")