Пример #1
0
    def capture_image(self):
        cap = self.cap
        for i in range(3):  # Need some warmup time; use the last frame
            success, frame = cap.read()
            if success:
                self.Error.no_webcam.clear()
            else:
                self.Error.no_webcam()
                return

        def normalize(name):
            return ''.join(ch for ch in unicodedata.normalize('NFD', name.replace(' ', '_'))
                           if unicodedata.category(ch) in 'LuLlPcPd')

        timestamp = datetime.now().strftime('%Y%m%d%H%M%S.%f')
        image_title, self.image_title = self.image_title or self.DEFAULT_TITLE, ''
        normed_name = normalize(image_title)

        for image, suffix, output in (
                (frame, '', self.Output.SNAPSHOT),
                (self.clip_aspect_frame(frame), '_aspect', self.Output.SNAPSHOT_ASPECT)):
            path = os.path.join(
                self.IMAGE_DIR, '{normed_name}_{timestamp}{suffix}.png'.format(**locals()))
            cv2.imwrite(path,
                        # imwrite expects original bgr image, so this is reversed
                        self.bgr2rgb(image) if self.avatar_filter else image)

            image_var = StringVariable('image')
            image_var.attributes['type'] = 'image'
            table = Table.from_numpy(Domain([], metas=[StringVariable('name'), image_var]),
                                     np.empty((1, 0)), metas=np.array([[image_title, path]]))
            self.send(output, table)

        self.snapshot_flash = 80
Пример #2
0
    def test_nyt_corpus_domain_generation(self):
        corpus = self.nyt.run_query('slovenia')
        meta_vars = [StringVariable.make(field) for field in NYT_TEXT_FIELDS] + \
                    [StringVariable.make('pub_date'), StringVariable.make('country')]

        self.assertEqual(len(meta_vars), len(corpus.domain.metas))
        self.assertEqual(len(corpus.Y), 10)
Пример #3
0
def concatenate_data(tables, filenames, label):
    domain, xs = domain_union_for_spectra(tables)
    ntables = [(table if isinstance(table, Table) else table[2]).transform(domain)
               for table in tables]
    data = type(ntables[0]).concatenate(ntables, axis=0)
    source_var = StringVariable.make("Filename")
    label_var = StringVariable.make("Label")

    # add other variables
    xs_atts = tuple([ContinuousVariable.make("%f" % f) for f in xs])
    domain = Domain(xs_atts + domain.attributes, domain.class_vars,
                    domain.metas + (source_var, label_var))
    data = data.transform(domain)

    # fill in spectral data
    xs_sind = np.argsort(xs)
    xs_sorted = xs[xs_sind]
    pos = 0
    for table in tables:
        t = table if isinstance(table, Table) else table[2]
        if not isinstance(table, Table):
            indices = xs_sind[np.searchsorted(xs_sorted, table[0])]
            data.X[pos:pos+len(t), indices] = table[1]
        pos += len(t)

    data[:, source_var] = np.array(list(
        chain(*(repeat(fn, len(table))
                for fn, table in zip(filenames, ntables)))
    )).reshape(-1, 1)
    data[:, label_var] = np.array(list(
        chain(*(repeat(label, len(table))
                for fn, table in zip(filenames, ntables)))
    )).reshape(-1, 1)
    return data
Пример #4
0
    def test_val(self):
        a = StringVariable("a")
        self.assertEqual(a.to_val(None), "")

        self.assertEqual(a.str_val(Unknown), "?")
        self.assertEqual(a.str_val(Value(a, None)), "None")
        self.assertEqual(a.repr_val(Value(a, "foo")), '"foo"')
Пример #5
0
    def test_to_val(self):
        string_var = StringVariable("x")
        self.assertEqual(string_var.to_val("foo"), "foo")
        self.assertEqual(string_var.to_val(42), "42")

        cont_var = ContinuousVariable("x")
        self.assertTrue(math.isnan(cont_var.to_val("?")))
        self.assertTrue(math.isnan(Unknown))

        var = Variable("x")
        self.assertEqual(var.to_val("x"), "x")
Пример #6
0
def create_domain(at, cl, metas):
    if OR3:
        return Orange.data.Domain(at, cl, metas=metas)
    else:
        domain  = Orange.data.Domain(at, cl)
        if metas:
            if isinstance(metas, dict):
                metas = sorted(metas.items())
            else:
                metas = zip([ StringVariable.new_meta_id() for _ in metas ], metas)
            domain.add_metas(dict((StringVariable.new_meta_id(), ma) for mi, ma in metas))
        return domain
Пример #7
0
    def test_proxy_has_separate_attributes(self):
        image = StringVariable("image")
        image1 = image.make_proxy()
        image2 = image1.make_proxy()

        image.attributes["origin"] = "a"
        image1.attributes["origin"] = "b"
        image2.attributes["origin"] = "c"

        self.assertEqual(image.attributes["origin"], "a")
        self.assertEqual(image1.attributes["origin"], "b")
        self.assertEqual(image2.attributes["origin"], "c")
Пример #8
0
def parse_record_json(record, includes_metadata):
    """
    Parses the JSON representation of the record returned by the New York Times Article API.
    :param record: The JSON representation of the query's results.
    :param includes_metadata: The flags that determine which fields to include.
    :return: A list of articles parsed into documents and a list of the
        corresponding metadata, joined in a tuple.
    """
    text_fields = ["headline", "lead_paragraph", "snippet", "abstract", "keywords"]

    documents = []
    class_values = []
    meta_vars = [StringVariable.make(field) for field, flag in zip(text_fields, includes_metadata) if flag]
    # Also add pub_date and glocation.
    meta_vars += [StringVariable.make("pub_date"), StringVariable.make("country")]
    metadata = np.empty((0, len(meta_vars)), dtype=object)
    for doc in record["response"]["docs"]:
        string_document = ""
        metas_row = []
        for field, flag in zip(text_fields, includes_metadata):
            if flag and field in doc:
                field_value = ""
                if isinstance(doc[field], dict):
                    field_value = " ".join([val for val in doc[field].values() if val])
                elif isinstance(doc[field], list):
                    field_value = " ".join([kw["value"] for kw in doc[field] if kw])
                else:
                    if doc[field]:
                        field_value = doc[field]
                string_document += field_value
                metas_row.append(field_value)
        # Add the pub_date.
        field_value = ""
        if "pub_date" in doc and doc["pub_date"]:
            field_value = doc["pub_date"]
        metas_row.append(field_value)
        # Add the glocation.
        metas_row.append(",".join([kw["value"] for kw in doc["keywords"] if kw["name"] == "glocations"]))

        # Add the section_name.
        class_val = ""
        if "section_name" in doc and doc["section_name"]:
            class_val = doc["section_name"]

        documents.append(string_document)
        class_values.append(class_val)
        metadata = np.vstack((metadata, np.array(metas_row)))
    return documents, metadata, meta_vars, class_values
Пример #9
0
    def _create_corpus(self):
        corpus = None
        names = ["name", "path", "content"]
        data = []
        category_data = []
        text_categories = list(set(t.category for t in self._text_data))
        values = list(set(text_categories))
        category_var = DiscreteVariable.make("category", values=values)
        for textdata in self._text_data:
            data.append(
                [textdata.name,
                 textdata.path,
                 textdata.content]
            )
            category_data.append(category_var.to_val(textdata.category))
        if len(text_categories) > 1:
            category_data = np.array(category_data)
        else:
            category_var = []
            category_data = np.empty((len(data), 0))
        domain = Domain(
            [], category_var, [StringVariable.make(name) for name in names]
        )
        domain["name"].attributes["title"] = True
        data = np.array(data, dtype=object)
        if len(data):
            corpus = Corpus(domain,
                            Y=category_data,
                            metas=data,
                            text_features=[domain.metas[2]])

        return corpus
Пример #10
0
def _corpus_from_records(records, includes_metadata):
    """Receives PubMed records and transforms them into a corpus.

    Args:
        records (list): A list of PubMed entries.
        includes_metadata (list): A list of text fields to include.

    Returns:
        corpus: The output Corpus.
    """
    meta_values, class_values = _records_to_corpus_entries(
            records,
            includes_metadata=includes_metadata
    )
    meta_vars = []
    for field_name, _ in includes_metadata:
        if field_name == 'pub_date':
            meta_vars.append(TimeVariable(field_name))
        else:
            meta_vars.append(StringVariable.make(field_name))

    class_vars = [
        DiscreteVariable('section_name', values=list(set(class_values)))
    ]
    domain = Domain([], class_vars=class_vars, metas=meta_vars)

    Y = np.array([class_vars[0].to_val(cv) for cv in class_values])[:, None]

    return Corpus(domain=domain, Y=Y, metas=meta_values)
Пример #11
0
 def generate_corpus(self, url_list):
     """
     generate new corpus with values requested by user
     :param url_list:
     :return: corpus
     """
     new_table=None
     text_includes_params = [self.includes_article, self.includes_author, self.includes_date,
                              self.includes_title, self.includes_web_url]
     if True not in text_includes_params:
         self.warning(1, "You must select at least one text field.")
         return
     required_text_fields = [incl_field for yes, incl_field in zip(text_includes_params, ARTICLE_TEXT_FIELDS) if yes]
     meta_vars = [StringVariable.make(field) for field in required_text_fields]
     metadata=[]
     for url in url_list:
         info, is_cached =_get_info(url)
         final_fields = [incl_field for yes, incl_field in zip(text_includes_params, info) if yes]
         metadata.append(final_fields)
     metadata = np.array(metadata, dtype=object)
     metas=metadata
     domain = Domain([], class_vars=None, metas=(meta_vars))
     new_table = Corpus(None, None, metadata, domain, meta_vars)
     self.output_corpus=new_table
     self.send("Corpus",self.output_corpus)
Пример #12
0
def create_domain(at, cl, metas):
    if OR3:
        return Orange.data.Domain(at, cl, metas=metas)
    else:
        domain  = Orange.data.Domain(at, cl)
        if metas:
            domain.add_metas(dict((StringVariable.new_meta_id(), ma) for ma in  metas))
        return domain
Пример #13
0
def create_domain(at, cl, metas):
    if OR3:
        return Orange.data.Domain(at, cl, metas=metas)
    else:
        domain  = Orange.data.Domain(at, cl)
        if metas:
            # add metas in the reverse order (because meta ids are always decreasing)
            # this allows us to pass metas in the same order to create_table
            metas = zip([ StringVariable.new_meta_id() for _ in metas ], reversed(metas))
            domain.add_metas(dict(metas))
        return domain
Пример #14
0
def _generate_corpus(records, required_text_fields):
    """
    Generates a corpus from the input NYT records.
    :param records: The input NYT records.
    :type records: list
    :param required_text_fields: A list of the available NYT text fields.
    :type required_text_fields: list
    :return: :class: `orangecontrib.text.corpus.Corpus`
    """
    metas, class_values = _parse_record_json(records, required_text_fields)
    documents = []
    for doc in metas:
        documents.append(" ".join([d for d in doc if d is not None]).strip())

    # Create domain.
    meta_vars = [StringVariable.make(field) for field in required_text_fields]
    meta_vars += [StringVariable.make("pub_date"), StringVariable.make("country")]
    class_vars = [DiscreteVariable("section_name", values=list(set(class_values)))]
    domain = Domain([], class_vars=class_vars, metas=meta_vars)

    Y = np.array([class_vars[0].to_val(cv) for cv in class_values])[:, None]

    return Corpus(documents, None, Y, metas, domain)
Пример #15
0
    def test_asserting_errors(self):
        c = Corpus.from_file('book-excerpts')

        with self.assertRaises(TypeError):
            Corpus(1.0, c.Y, c.metas, c.domain, c.text_features)

        too_large_x = np.vstack((c.X, c.X))
        with self.assertRaises(ValueError):
            Corpus(c.domain, too_large_x, c.Y, c.metas, c.W, c.text_features)

        with self.assertRaises(ValueError):
            c.set_text_features([StringVariable('foobar')])

        with self.assertRaises(ValueError):
            c.set_text_features([c.domain.metas[0], c.domain.metas[0]])
    def commit(self):
        table = None
        if self.data is not None:
            if self.correlations_type == 2 and self.target_variable and \
                    self.target_variable.is_continuous:
                pearson = ContinuousVariable.make("Pearson")
                spearman = ContinuousVariable.make("Spearman")
                row_name = StringVariable.make("Variable")

                domain = Orange.data.Domain([pearson, spearman],
                                            metas=[row_name])
                table = Orange.data.Table(domain, self.target_correlations)
                for inst, name in zip(table, self.var_names):
                    inst[row_name] = name
        self.send("Correlations", table)
Пример #17
0
    def send_coefficients(self):
        """
        Function sends logistic regression coefficients on output.
        """
        if self.learner is not None and self.learner.theta is not None:
            domain = Domain(
                    [ContinuousVariable("Coefficients", number_of_decimals=7)],
                    metas=[StringVariable("Name")])
            names = ["theta 0", "theta 1"]

            coefficients_table = Table(
                    domain, list(zip(list(self.learner.theta), names)))
            self.Outputs.coefficients.send(coefficients_table)
        else:
            self.Outputs.coefficients.send(None)
Пример #18
0
def graph_to_table(G):
    """Builds a Data Table from node values."""
    if G.number_of_nodes() > 0:
        features = list(set(itertools.chain.from_iterable(node.keys() for node in G.node.values())))
        data = [[node.get(f).replace('\t', ' ') if isinstance(node.get(f, 1), str) else str(node.get(f, '?'))
                 for f in features]
                for node in G.node.values()]
        fp = tempfile.NamedTemporaryFile('wt', suffix='.tab', delete=False)
        fp.write('\n'.join('\t'.join(line) for line in [features] + data))
        fp.close()
        table = Table(fp.name)
        os.unlink(fp.name)
    else:
        table = Table(Domain([], [], [StringVariable("id")]), [])
    return table
Пример #19
0
 def test_duplicate_names(self):
     domain = Domain([ContinuousVariable("C1")],
                     metas=[DiscreteVariable("Feature", values=("A", "B"))])
     data = Table(domain, np.array([[1.], [0.]]),
                  metas=np.array([[1.], [0.]]))
     domain = Domain([ContinuousVariable("C1")],
                     metas=[StringVariable("Feature")])
     extra_data = Table(domain, np.array([[1.], [0.]]),
                        metas=np.array([["A"], ["B"]]))
     self.send_signal(self.widget.Inputs.data, data)
     self.send_signal(self.widget.Inputs.extra_data, extra_data)
     self.assertTrue(self.widget.Warning.renamed_vars.is_shown())
     merged_data = self.get_output(self.widget.Outputs.data)
     self.assertListEqual([m.name for m in merged_data.domain.metas],
                          ["Feature (1)", "Feature (2)"])
Пример #20
0
 def update_model(self):
     super().update_model()
     coeffs = None
     if self.model is not None:
         if self.model.domain.class_var.is_discrete:
             coeffs = create_coef_table(self.model)
         else:
             attrs = [ContinuousVariable("coef")]
             domain = Domain(attrs, metas=[StringVariable("name")])
             cfs = list(self.model.intercept) + list(self.model.coefficients)
             names = ["intercept"] + \
                     [attr.name for attr in self.model.domain.attributes]
             coeffs = Table.from_list(domain, list(zip(cfs, names)))
             coeffs.name = "coefficients"
     self.Outputs.coefficients.send(coeffs)
Пример #21
0
def create_domain(*ss):
    Variable._clear_all_caches()
    vars=dict(
        age=ContinuousVariable(name="AGE"),
        gender=DiscreteVariable(name="Gender", values=["M", "F"]),
        incomeA=ContinuousVariable(name="incomeA"),
        income=ContinuousVariable(name="income"),
        education=DiscreteVariable(name="education", values=["GS", "HS", "C"]),
        ssn=StringVariable(name="SSN"),
        race=DiscreteVariable(name="race",
                              values=["White", "Hypsanic", "African", "Other"]))

    def map_vars(s):
        return [vars[x] for x in s]
    return Domain(*[map_vars(s) for s in ss])
Пример #22
0
 def __getitem__(self, key):
     if not self:
         for tpe, char, col in ((vartype(ContinuousVariable("c")),
                                 "N", (202, 0, 32)),
                                (vartype(DiscreteVariable("d")),
                                 "C", (26, 150, 65)),
                                (vartype(StringVariable("s")),
                                 "S", (0, 0, 0)),
                                (vartype(TimeVariable("t")),
                                 "T", (68, 170, 255)),
                                (-1, "?", (128, 128, 128))):
             self[tpe] = createAttributePixmap(char, QtGui.QColor(*col))
     if key not in self:
         key = vartype(key) if isinstance(key, Variable) else -1
     return super().__getitem__(key)
Пример #23
0
    def test_labels(self):
        x, y = (ContinuousVariable(c) for c in "xy")
        s = StringVariable("s")
        grades = Table.from_list(Domain(
            [x, y], [], [s]), [[91.0, 89.0, "Bill"], [51.0, 100.0, "Cynthia"],
                               [9.0, 61.0, "Demi"], [49.0, 92.0, "Fred"],
                               [91.0, 49.0, "George"]])

        distances = Euclidean(grades)
        self.widget.set_distances(distances)
        ac = self.widget.annot_combo
        idx = ac.model().indexOf(grades.domain.metas[0])
        ac.setCurrentIndex(idx)
        ac.activated.emit(idx)
        self.assertIsNone(self.widget.tablemodel.label_colors)
Пример #24
0
	def commit(self):
		if self.data is None or self.disc_data is None:
			self.Outputs.features.send(None)
			self.Outputs.interactions.send(None)
			return

		attrs = [ContinuousVariable("Interaction"), ContinuousVariable("Entropy Removed")]
		metas = [StringVariable("Feature 1"), StringVariable("Feature 2")]
		domain = Domain(attrs, metas=metas)
		model = self.vizrank.rank_model
		x = np.array(
			[[float(model.data(model.index(row, 0), role))
				for role in (InteractionRank.IntRole, InteractionRank.RemovedRole)]
				for row in range(model.rowCount())])
		m = np.array(
			[[a.name for a in model.data(model.index(row, 0), InteractionRank._AttrRole)]
				for row in range(model.rowCount())], dtype=object)
		int_table = Table(domain, x, metas=m)
		int_table.name = "Interactions"

		# data has been imputed; send original attributes
		self.Outputs.features.send(AttributeList(
			[self.data.domain[var.name] for var in self.selection]))
		self.Outputs.interactions.send(int_table)
Пример #25
0
 def test_preprocess(self):
     domain = Domain([
         ContinuousVariable("c"),
         DiscreteVariable("d", values=['a', 'b'])
     ], [DiscreteVariable("cls", values=['e', 'f'])], [StringVariable("m")])
     table = Table(domain, [[1, 'a', 'e', 'm1'], [2, 'b', 'f', 'm2']])
     new_table = _preprocess(table)
     np.testing.assert_equal(new_table.X, table.X[:, 0].reshape(2, 1))
     np.testing.assert_equal(new_table.Y, table.Y)
     np.testing.assert_equal(new_table.metas, table.metas)
     self.assertEqual(
         [a.name for a in new_table.domain.attributes],
         [a.name for a in table.domain.attributes if a.is_continuous])
     self.assertEqual(new_table.domain.class_vars, table.domain.class_vars)
     self.assertEqual(new_table.domain.metas, table.domain.metas)
Пример #26
0
def create_domain(*ss):
    vars = dict(
        age=ContinuousVariable(name="AGE"),
        gender=DiscreteVariable(name="Gender", values=("M", "F")),
        incomeA=ContinuousVariable(name="incomeA"),
        income=ContinuousVariable(name="income"),
        education=DiscreteVariable(name="education", values=("GS", "HS", "C")),
        ssn=StringVariable(name="SSN"),
        race=DiscreteVariable(name="race",
                              values=("White", "Hypsanic", "African", "Other")),
        arrival=TimeVariable("arrival"))

    def map_vars(s):
        return [vars[x] for x in s]
    return Domain(*[map_vars(s) for s in ss])
Пример #27
0
 def generate(self):
     name, func, args, *_ = self.GRAPH_TYPES[self.graph_type]
     args = tuple(self.arguments[_ctrl_name(name, arg)] for arg in args)
     self.Error.generation_error.clear()
     try:
         network = func(*args)
     except ValueError as exc:
         self.Error.generation_error(exc)
         network = None
     else:
         n = len(network.nodes)
         network.nodes = Table(Domain([], [], [StringVariable("id")]),
                               np.zeros((n, 0)), np.zeros((n, 0)),
                               np.arange(n).reshape((n, 1)))
     self.Outputs.network.send(network)
Пример #28
0
    def test_value_string_substring_flags(self):
        trans = ValueFromStringSubstring(StringVariable("x"), self.patterns)
        with patch('Orange.widgets.data.owcreateclass.map_by_substring') as mbs:
            trans.case_sensitive = True
            trans.transform(self.arr)
            case_sensitive, match_beginning = mbs.call_args[0][-2:]
            self.assertTrue(case_sensitive)
            self.assertFalse(match_beginning)

            trans.case_sensitive = False
            trans.match_beginning = True
            trans.transform(self.arr)
            case_sensitive, match_beginning = mbs.call_args[0][-2:]
            self.assertFalse(case_sensitive)
            self.assertTrue(match_beginning)
Пример #29
0
    def setUp(self):
        #: OWAggregateColumns
        self.widget = self.create_widget(OWAggregateColumns)
        c1, c2, c3 = map(ContinuousVariable, "c1 c2 c3".split())
        t1, t2 = map(TimeVariable, "t1 t2".split())
        d1, d2, d3 = (DiscreteVariable(n, values=("a", "b", "c"))
                      for n in "d1 d2 d3".split())
        s1 = StringVariable("s1")
        domain1 = Domain([c1, c2, d1, d2, t1], [d3], [s1, c3, t2])
        self.data1 = Table.from_list(domain1,
                                     [[0, 1, 0, 1, 2, 0, "foo", 0, 3],
                                      [3, 1, 0, 1, 42, 0, "bar", 0, 4]])

        domain2 = Domain([ContinuousVariable("c4")])
        self.data2 = Table.from_list(domain2, [[4], [5]])
Пример #30
0
    def commit(self):
        if self.data is None or self.cont_data is None:
            self.Outputs.data.send(self.data)
            self.Outputs.features.send(None)
            self.Outputs.correlations.send(None)
            return

        metas = [StringVariable("Feature 1"), StringVariable("Feature 2")]
        domain = Domain([ContinuousVariable("Correlation")], metas=metas)
        model = self.vizrank.rank_model
        x = np.array([[float(model.data(model.index(row, 0)))] for row
                      in range(model.rowCount())])
        # pylint: disable=protected-access
        m = np.array([[a.name for a in model.data(model.index(row, 0),
                                                  CorrelationRank._AttrRole)]
                      for row in range(model.rowCount())], dtype=object)
        corr_table = Table(domain, x, metas=m)
        corr_table.name = "Correlations"

        self.Outputs.data.send(self.data)
        # data has been imputed; send original attributes
        self.Outputs.features.send(AttributeList([attr.compute_value.variable
                                                  for attr in self.selection]))
        self.Outputs.correlations.send(corr_table)
Пример #31
0
 def test_num_meta_labels(self):
     x, y = (ContinuousVariable(c) for c in "xy")
     s = StringVariable("s")
     data = Table.from_list(
         Domain([x], [], [y, s]),
         [[0, 1, "a"],
          [1, np.nan, "b"]]
     )
     distances = Euclidean(data)
     self.widget.set_distances(distances)
     ac = self.widget.annot_combo
     idx = ac.model().indexOf(y)
     ac.setCurrentIndex(idx)
     ac.activated.emit(idx)
     self.assertEqual(self.widget.tablemodel.labels, ["1", "?"])
    def setUp(self):
        self.widget = self.create_widget(OWClusterAnalysis)

        self.data_table = Table("iris")
        self.data_table.attributes[GENE_AS_ATTRIBUTE_NAME] = True
        self.data_table.attributes[GENE_ID_ATTRIBUTE] = NCBI_ID
        for i, var in enumerate(self.data_table.domain.attributes):
            var.attributes[NCBI_ID] = str(i)

        domain = Domain(self.data_table.domain.attributes[0:2])
        self.genes_as_attributes = self.data_table.transform(domain)

        domain = Domain([], metas=[StringVariable("Gene ID")])
        self.genes_as_rows = Table.from_list(domain, [["1"], ["2"]])
        self.genes_as_rows.attributes[GENE_AS_ATTRIBUTE_NAME] = False
        self.genes_as_rows.attributes[GENE_ID_COLUMN] = "Gene ID"
Пример #33
0
    def test_remove_discrete(self):
        d1, d2, d3 = (DiscreteVariable(c, values=tuple("123")) for c in "abc")
        c1, c2 = (ContinuousVariable(c) for c in "xy")
        t = StringVariable("t")
        domain = Domain([d1, c1], d2, [c2, d3, t])
        data = Table.from_domain(domain, 5)

        reduced = distance.remove_discrete_features(data)
        self.assertEqual(reduced.domain.attributes, (c1, ))
        self.assertEqual(reduced.domain.class_var, d2)
        self.assertEqual(reduced.domain.metas, (c2, d3, t))

        reduced = distance.remove_discrete_features(data, to_metas=True)
        self.assertEqual(reduced.domain.attributes, (c1, ))
        self.assertEqual(reduced.domain.class_var, d2)
        self.assertEqual(reduced.domain.metas, (c2, d3, t, d1))
Пример #34
0
    def test_variable_editor(self):
        w = VariableEditor()
        self.assertIs(w.get_data(), None)

        v = StringVariable(name="S")
        v.attributes.update({"A": 1, "B": "b"}, )
        w.set_data(v)

        self.assertEqual(w.name_edit.text(), v.name)
        self.assertEqual(w.labels_model.get_dict(), v.attributes)
        self.assertTrue(w.is_same())

        w.set_data(None)
        self.assertEqual(w.name_edit.text(), "")
        self.assertEqual(w.labels_model.get_dict(), {})
        self.assertIs(w.get_data(), None)
Пример #35
0
    def _to_addendum(self, df, keep):
        if not df.shape[1]:
            return None, None

        df.drop(['_id', 'adm0_a3'], axis=1, inplace=True)
        addendum = df if self.append_features else df[keep]

        metas = []
        for col in addendum:
            unique_name = get_unique_names(self.data.domain, col)
            if col in ('latitude', 'longitude'):
                metas.append(ContinuousVariable(unique_name))
            else:
                metas.append(StringVariable(unique_name))

        return addendum.values, tuple(metas)
Пример #36
0
def create_coef_table(classifier):
    i = classifier.intercept
    c = classifier.coefficients
    if c.shape[0] > 2:
        values = [classifier.domain.class_var.values[int(i)] for i in classifier.used_vals[0]]
    else:
        values = [classifier.domain.class_var.values[int(classifier.used_vals[0][1])]]
    domain = Domain([ContinuousVariable(value, number_of_decimals=7)
                     for value in values], metas=[StringVariable("name")])
    coefs = np.vstack((i.reshape(1, len(i)), c.T))
    names = [[attr.name] for attr in classifier.domain.attributes]
    names = [["intercept"]] + names
    names = np.array(names, dtype=object)
    coef_table = Table.from_numpy(domain, X=coefs, metas=names)
    coef_table.name = "coefficients"
    return coef_table
Пример #37
0
    def commit(self):
        out = None
        if self.corpus is not None:
            rows = [i for i, doc in enumerate(self.corpus.ngrams)
                    if any(word in doc for word in self.selected_words)]
            out = self.corpus[rows]
        self.Outputs.corpus.send(out)

        topic = None
        words = list(self.selected_words)
        if words:
            topic = Topic.from_numpy(Domain([], metas=[StringVariable('Words')]),
                                     X=np.empty((len(words), 0)),
                                     metas=np.c_[words].astype(object))
            topic.name = 'Selected Words'
        self.Outputs.selected_words.send(topic)
Пример #38
0
    def test_get_column(self):
        widget = self.widget
        get_column = widget.get_column

        cont = ContinuousVariable("cont")
        disc = DiscreteVariable("disc", list("abcdefghijklmno"))
        disc2 = DiscreteVariable("disc2", list("abc"))
        disc3 = DiscreteVariable("disc3", list("abc"))
        string = StringVariable("string")
        domain = Domain([cont, disc], disc2, [disc3, string])

        widget.data = Table.from_numpy(
            domain,
            np.array([[1, 4], [2, 15], [6, 7]], dtype=float),
            np.array([2, 1, 0], dtype=float),
            np.array([[0, "foo"], [2, "bar"], [1, "baz"]])
        )

        self.assertIsNone(get_column(None))
        np.testing.assert_almost_equal(get_column(cont), [1, 2, 6])
        np.testing.assert_almost_equal(get_column(disc), [4, 15, 7])
        np.testing.assert_almost_equal(get_column(disc2), [2, 1, 0])
        np.testing.assert_almost_equal(get_column(disc3), [0, 2, 1])
        self.assertEqual(list(get_column(string)), ["foo", "bar", "baz"])

        widget.valid_data = np.array([True, False, True])

        self.assertIsNone(get_column(None))
        np.testing.assert_almost_equal(get_column(cont), [1, 6])
        self.assertEqual(list(get_column(string)), ["foo", "baz"])

        self.assertIsNone(get_column(None, False))
        np.testing.assert_almost_equal(get_column(cont, False), [1, 2, 6])
        self.assertEqual(list(get_column(string, False)), ["foo", "bar", "baz"])

        self.assertIsNone(get_column(None, return_labels=True))
        self.assertEqual(get_column(disc, return_labels=True), disc.values)
        self.assertEqual(get_column(disc2, return_labels=True), disc2.values)
        self.assertEqual(get_column(disc3, return_labels=True), disc3.values)
        with self.assertRaises(AssertionError):
            get_column(cont, return_labels=True)
        with self.assertRaises(AssertionError):
            get_column(cont, return_labels=True, max_categories=4)
        with self.assertRaises(AssertionError):
            get_column(string, return_labels=True)
        with self.assertRaises(AssertionError):
            get_column(string, return_labels=True, max_categories=4)
Пример #39
0
    def handle_languages(self):
        if self.corpus is not None:
            domain = self.corpus.domain
            if self.detect_languages:
                if self.corpus.languages is None:
                    self.corpus.detect_languages()

                curr_attributes = list(domain.attributes)
                curr_class_var = [domain.class_var] if domain.class_var else []
                curr_metas = list(domain.metas)
                curr_variables = curr_attributes + curr_class_var + curr_metas
                curr_names = [var.name for var in curr_variables]
                new_name = get_unique_names(curr_names, "Language")

                variable_attrs = {'language-feature': True}
                new_variable = StringVariable(new_name)
                new_variable.attributes.update(variable_attrs)
                new_domain = Domain(attributes=domain.attributes,
                                    class_vars=domain.class_var,
                                    metas=list(domain.metas) + [new_variable])
                metas = np.hstack([
                    self.corpus.metas,
                    np.array(self.corpus.languages).reshape(-1, 1)
                ])
                self.corpus = Corpus(new_domain, self.corpus.X.copy(),
                                     self.corpus.Y.copy(), metas,
                                     self.corpus.W.copy(),
                                     copy(self.corpus.text_features))
            else:
                lang_feat_idx = None
                for i, f in enumerate(domain.metas):
                    if ('language-feature' in f.attributes
                            and f.attributes['language-feature']):
                        lang_feat_idx = i
                        break
                if lang_feat_idx is not None:
                    new_domain = Domain(attributes=domain.attributes,
                                        class_vars=domain.class_var,
                                        metas=list(
                                            np.delete(list(domain.metas),
                                                      lang_feat_idx)))
                    self.corpus = Corpus(
                        new_domain, self.corpus.X.copy(), self.corpus.Y.copy(),
                        np.delete(self.corpus.metas, lang_feat_idx, axis=1),
                        self.corpus.W.copy(), copy(self.corpus.text_features))
        self.Outputs.corpus.send(self.corpus)
Пример #40
0
    def test_value_from_string_substring(self):
        trans = ValueFromStringSubstring(StringVariable(), self.patterns)
        arr2 = np.hstack((self.arr.astype(object), [None]))

        with patch("Orange.widgets.data.owcreateclass.map_by_substring") as mbs:
            trans.transform(self.arr)
            a, patterns, case_sensitive, match_beginning = mbs.call_args[0]
            np.testing.assert_equal(a, self.arr)
            self.assertEqual(patterns, self.patterns)
            self.assertFalse(case_sensitive)
            self.assertFalse(match_beginning)

            trans.transform(arr2)
            a, patterns, *_ = mbs.call_args[0]
            np.testing.assert_equal(a, np.hstack((self.arr.astype(str), "")))

        np.testing.assert_equal(trans.transform(arr2), [0, 1, 2, 0, 3, np.nan])
Пример #41
0
def create_data_from_states(example_states, example_traces):
    data_desc = example_states[0].domain
    attributes = data_desc.get_attributes()
    domain = Domain(
        attributes,
        ContinuousVariable.make("complexity"),
        metas=[StringVariable.make("id"),
               ContinuousVariable("trace")])
    data = Table.from_domain(domain)
    for si, s in enumerate(example_states):
        e = Instance(domain)
        for f in attributes:
            e[f] = s.get_attribute(f)
        e["id"] = s.get_id()
        e["trace"] = example_traces[si]
        data.append(e)
    return data
 def setUp(self):
     self.parent = DummyWidget()
     self.graph = DropoutGraph(self.parent)
     self.results = results = Mock()
     results.decay = 1
     results.x_offset = 0.1
     results.y_offset = 0.1
     results.mean_expr = np.array([0.1, 0.2])
     results.zero_rate = np.array([0.1, 0.2])
     results.threshold = 0
     self.data = Table(
         Domain([ContinuousVariable("A"),
                 ContinuousVariable("B")]),
         np.array([[1, 0], [0, 0], [2, 0]]))
     self.genes = Table(Domain([], metas=[StringVariable("Entrez ID")]),
                        np.empty((1, 0)),
                        metas=np.array([["1"]]))
Пример #43
0
    def test_from_documents(self):
        documents = [{
            'wheels': 4,
            'engine': 'w4',
            'type': 'car',
            'desc': 'A new car.'
        }, {
            'wheels': 8.,
            'engine': 'w8',
            'type': 'truck',
            'desc': 'An old truck.'
        }, {
            'wheels': 12.,
            'engine': 'w12',
            'type': 'truck',
            'desc': 'An new truck.'
        }]

        attrs = [
            (DiscreteVariable('Engine'), lambda doc: doc.get('engine')),
            (ContinuousVariable('Wheels'), lambda doc: doc.get('wheels')),
        ]

        class_vars = [
            (DiscreteVariable('Type'), lambda doc: doc.get('type')),
        ]

        metas = [
            (StringVariable('Description'), lambda doc: doc.get('desc')),
        ]

        dataset_name = 'TruckData'
        c = Corpus.from_documents(documents, dataset_name, attrs, class_vars,
                                  metas)

        self.assertEqual(len(c), len(documents))
        self.assertEqual(c.name, dataset_name)
        self.assertEqual(len(c.domain.attributes), len(attrs))
        self.assertEqual(len(c.domain.class_vars), len(class_vars))
        self.assertEqual(len(c.domain.metas), len(metas))

        engine_dv = c.domain.attributes[0]
        self.assertEqual(sorted(engine_dv.values),
                         sorted([d['engine'] for d in documents]))
        self.assertEqual([engine_dv.repr_val(v) for v in c.X[:, 0]],
                         [d['engine'] for d in documents])
    def test_varying_between_combined(self):
        X = np.array([
            [0, 0, 0, 0, 0, 1],
            [0, 0, 1, 1, 0, 1],
            [0, 0, 0, 2, np.nan, np.nan],
            [0, 1, 0, 0, 0, 0],
            [0, 1, 0, 2, 0, 0],
            [0, 1, 0, 0, np.nan, 0],
        ])

        M = np.array(
            [
                ["A", 0, 0, 0, 0, 0, 1],
                ["A", 0, 0, 1, 1, 0, 1],
                ["A", 0, 0, 0, 2, np.nan, np.nan],
                ["B", 0, 1, 0, 0, 0, 0],
                ["B", 0, 1, 0, 2, 0, 0],
                ["B", 0, 1, 0, 0, np.nan, 0],
            ],
            dtype=str,
        )

        variables = [
            ContinuousVariable(name="F%d" % j) for j in range(X.shape[1])
        ]
        metas = [StringVariable(name="M%d" % j) for j in range(M.shape[1])]
        domain = Domain(attributes=variables, metas=metas)

        data = Table.from_numpy(X=X, domain=domain, metas=M)

        self.assertEqual(
            varying_between(data, idvar=data.domain.metas[0]),
            [
                variables[2], variables[3], metas[3], metas[4], metas[5],
                metas[6]
            ],
        )

        data = Table.from_numpy(X=sp.csr_matrix(X), domain=domain, metas=M)
        self.assertEqual(
            varying_between(data, idvar=data.domain.metas[0]),
            [
                variables[2], variables[3], metas[3], metas[4], metas[5],
                metas[6]
            ],
        )
Пример #45
0
    def test_domaineditor_makes_variables(self):
        # Variables created with domain editor should be interchangeable
        # with variables read from file.

        dat = """V0\tV1\nc\td\n\n1.0\t2"""
        v0 = StringVariable.make("V0")
        v1 = ContinuousVariable.make("V1")

        with named_file(dat, suffix=".tab") as filename:
            self.open_dataset(filename)

            model = self.widget.domain_editor.model()
            model.setData(model.createIndex(0, 1), "text", Qt.EditRole)
            model.setData(model.createIndex(1, 1), "numeric", Qt.EditRole)
            self.widget.apply_button.click()

            data = self.get_output(self.widget.Outputs.data)
            self.assertEqual(data.domain["V0"], v0)
            self.assertEqual(data.domain["V1"], v1)
Пример #46
0
def transpose_table(table):
    """
    Transpose the rows and columns of the table.

    Args:
        table: Data in :obj:`Orange.data.Table`

    Returns:
         Transposed :obj:`Orange.data.Table`. (Genes as columns)
    """
    attrs = table.domain.attributes
    attr = [ContinuousVariable.make(ex['Gene'].value) for ex in table]
    #  Set metas
    new_metas = [StringVariable.make(name) if name is not 'Time' else TimeVariable.make(name)
                 for name in sorted(table.domain.variables[0].attributes.keys())]
    domain = Domain(attr, metas=new_metas)
    meta_values = [[exp.attributes[var.name] for var in domain.metas] for exp in attrs]

    return Table(domain, table.X.transpose(), metas=meta_values)
Пример #47
0
def transpose_labels_to_class(data, class_label=None, gene_label="gene"):
    """Converts data with genes in rows to data with genes as attributes."""
    # if no class_label (attribute type) given, guess it from the data
    if not class_label:
        l = []
        for a in data.domain.attributes:
            l.extend(list(a.attributes.keys()))
        l = list(set(l))
        class_label = l[0]
        if len(set(l)) > 1:
            import warnings
            warnings.warn("More than single attribute label types (%s), took %s"
                          % (", ".join(l), class_label))

    if gene_label in [v.name for v in data.domain.getmetas().values()]:
        atts = [ContinuousVariable(str(d[gene_label])) for d in data]
    else:
        atts = [ContinuousVariable("A%d" % i) for i in range(len(data))]
        
    classvalues = list(set([a.attributes[class_label] for a in data.domain.attributes]))
    
    if all([isinstance(x, (int, float, complex)) for x in classvalues]):
        classvar = ContinuousVariable(class_label)
    else:
        classvar = DiscreteVariable(class_label, values=classvalues)
        
    domain = Orange.data.Domain(atts, classvar)
    
    newdata = []
    for a in data.domain.attributes:
        newdata.append([_float_or_na(d[a]) for d in data] + [a.attributes[class_label]])

    sample = StringVariable("sample")
    id = StringVariable.new_meta_id()
    new = Orange.data.Table(domain, newdata)
    new.domain.addmeta(id, sample)
    for i, d in enumerate(new):
        d[sample] = data.domain.attributes[i].name

    return new
Пример #48
0
def transpose_class_to_labels(data, attcol="sample"):
    """Converts data with genes as attributes to data with genes in rows."""
    if attcol in [v.name for v in data.domain.getmetas().values()]:
        atts = [ContinuousVariable(str(d[attcol])) for d in data]
    else:
        atts = [ContinuousVariable("S%d" % i) for i in range(len(data))]
    for i, d in enumerate(data):
        atts[i].setattr("class", str(d.getclass()))
    domain = Orange.data.Domain(atts, None)
    
    newdata = []
    for a in data.domain.attributes:
        newdata.append([_float_or_na(d[a]) for d in data])

    gene = StringVariable("gene")
    id = StringVariable.new_meta_id()
    new = Orange.data.Table(domain, newdata)
    new.domain.addmeta(id, gene)
    for i, d in enumerate(new):
        d[gene] = data.domain.attributes[i].name

    return new
Пример #49
0
    def _guess_variable(self, field_name, field_metadata, inspect_table):
        type_code = field_metadata[0]

        FLOATISH_TYPES = (700, 701, 1700)  # real, float8, numeric
        INT_TYPES = (20, 21, 23)  # bigint, int, smallint
        CHAR_TYPES = (25, 1042, 1043,)  # text, char, varchar
        BOOLEAN_TYPES = (16,)  # bool
        DATE_TYPES = (1082, 1114, 1184, )  # date, timestamp, timestamptz
        # time, timestamp, timestamptz, timetz
        TIME_TYPES = (1083, 1114, 1184, 1266,)

        if type_code in FLOATISH_TYPES:
            return ContinuousVariable.make(field_name)

        if type_code in TIME_TYPES + DATE_TYPES:
            tv = TimeVariable.make(field_name)
            tv.have_date |= type_code in DATE_TYPES
            tv.have_time |= type_code in TIME_TYPES
            return tv

        if type_code in INT_TYPES:  # bigint, int, smallint
            if inspect_table:
                values = self.get_distinct_values(field_name, inspect_table)
                if values:
                    return DiscreteVariable.make(field_name, values)
            return ContinuousVariable.make(field_name)

        if type_code in BOOLEAN_TYPES:
            return DiscreteVariable.make(field_name, ['false', 'true'])

        if type_code in CHAR_TYPES:
            if inspect_table:
                values = self.get_distinct_values(field_name, inspect_table)
                # remove trailing spaces
                values = [v.rstrip() for v in values]
                if values:
                    return DiscreteVariable.make(field_name, values)

        return StringVariable.make(field_name)
Пример #50
0
    def etc_to_table(self, etc_json, time_var=False, callback=lambda: None):
        """ Converts data from Json to :obj:`Orange.data.table`

        Args:
            etc_json (dict): Data in json like format
            time_var (bool): Create column of time points. Default is set to False.
        Returns:
            :obj:`Orange.data.Table`
        """
        cbc = CallBack(2, callback, callbacks=30)

        variables = []
        time_point = 1
        for time in etc_json['etc']['timePoints']:
            var = ContinuousVariable('TP ' + str(time_point))
            var.attributes['Time'] = str(time)
            variables.append(var)
            time_point += 1

        meta_attr = StringVariable.make('Gene')
        domain = Domain(variables, metas=[meta_attr])
        cbc()

        table = []
        for row in etc_json['etc']['genes']:
            gene_expression = [exp for exp in etc_json['etc']['genes'][row]]
            gene_expression.append(row)
            table.append(gene_expression)

        orange_table = Table(domain, table)

        if time_var:
            orange_table = transpose_table(orange_table)
            cbc()

        cbc.end()
        return orange_table
Пример #51
0
def _corpus_from_records(records, includes_metadata):
    """Receives PubMed records and transforms them into a corpus.

    Args:
        records (list): A list of PubMed entries.
        includes_metadata (list): A list of text fields to include.

    Returns:
        corpus: The output Corpus.
    """
    meta_vars = []
    time_var = None
    for field_name, _ in includes_metadata:
        if field_name == PUBMED_FIELD_DATE:
            time_var = TimeVariable(field_name)
            meta_vars.append(time_var)
        else:
            meta_vars.append(StringVariable.make(field_name))
            if field_name == PUBMED_FIELD_TITLE:
                meta_vars[-1].attributes["title"] = True

    meta_values, class_values = _records_to_corpus_entries(
        records,
        includes_metadata=includes_metadata,
        time_var=time_var,
    )

    class_vars = [
        DiscreteVariable('section',
                         values=list(map(str, set(filter(None, class_values)))))
    ]
    domain = Domain([], class_vars=class_vars, metas=meta_vars)

    Y = np.array([class_vars[0].to_val(cv) for cv in class_values])[:, None]

    return Corpus(domain=domain, Y=Y, metas=meta_values)
Пример #52
0
    def read(self):
        try:
            import opusFC
        except ImportError:
            raise RuntimeError(self._OPUS_WARNING)

        if self.sheet:
            db = self.sheet
        else:
            db = self.sheets[0]

        db = tuple(db.split(" "))
        dim = db[1]

        try:
            data = opusFC.getOpusData(self.filename, db)
        except Exception:
            raise IOError("Couldn't load spectrum from " + self.filename)

        attrs, clses, metas = [], [], []

        attrs = [ContinuousVariable.make(repr(data.x[i]))
                 for i in range(data.x.shape[0])]

        y_data = None
        meta_data = None

        if type(data) == opusFC.MultiRegionDataReturn:
            y_data = []
            meta_data = []
            metas.extend([ContinuousVariable.make('map_x'),
                          ContinuousVariable.make('map_y'),
                          StringVariable.make('map_region'),
                          TimeVariable.make('start_time')])
            for region in data.regions:
                y_data.append(region.spectra)
                mapX = region.mapX
                mapY = region.mapY
                map_region = np.full_like(mapX, region.title, dtype=object)
                start_time = region.start_time
                meta_region = np.column_stack((mapX, mapY,
                                               map_region, start_time))
                meta_data.append(meta_region.astype(object))
            y_data = np.vstack(y_data)
            meta_data = np.vstack(meta_data)

        elif type(data) == opusFC.MultiRegionTRCDataReturn:
            y_data = []
            meta_data = []
            metas.extend([ContinuousVariable.make('map_x'),
                          ContinuousVariable.make('map_y'),
                          StringVariable.make('map_region')])
            attrs = [ContinuousVariable.make(repr(data.labels[i]))
                     for i in range(len(data.labels))]
            for region in data.regions:
                y_data.append(region.spectra)
                mapX = region.mapX
                mapY = region.mapY
                map_region = np.full_like(mapX, region.title, dtype=object)
                meta_region = np.column_stack((mapX, mapY, map_region))
                meta_data.append(meta_region.astype(object))
            y_data = np.vstack(y_data)
            meta_data = np.vstack(meta_data)

        elif type(data) == opusFC.ImageDataReturn:
            metas.extend([ContinuousVariable.make('map_x'),
                          ContinuousVariable.make('map_y')])

            data_3D = data.spectra

            for i in np.ndindex(data_3D.shape[:1]):
                map_y = np.full_like(data.mapX, data.mapY[i])
                coord = np.column_stack((data.mapX, map_y))
                if y_data is None:
                    y_data = data_3D[i]
                    meta_data = coord.astype(object)
                else:
                    y_data = np.vstack((y_data, data_3D[i]))
                    meta_data = np.vstack((meta_data, coord))

        elif type(data) == opusFC.ImageTRCDataReturn:
            metas.extend([ContinuousVariable.make('map_x'),
                          ContinuousVariable.make('map_y')])

            attrs = [ContinuousVariable.make(repr(data.labels[i]))
                     for i in range(len(data.labels))]
            data_3D = data.traces

            for i in np.ndindex(data_3D.shape[:1]):
                map_y = np.full_like(data.mapX, data.mapY[i])
                coord = np.column_stack((data.mapX, map_y))
                if y_data is None:
                    y_data = data_3D[i]
                    meta_data = coord.astype(object)
                else:
                    y_data = np.vstack((y_data, data_3D[i]))
                    meta_data = np.vstack((meta_data, coord))

        elif type(data) == opusFC.TimeResolvedTRCDataReturn:
            y_data = data.traces

        elif type(data) == opusFC.TimeResolvedDataReturn:
            metas.extend([ContinuousVariable.make('z')])

            y_data = data.spectra
            meta_data = data.z

        elif type(data) == opusFC.SingleDataReturn:
            y_data = data.y[None, :]

        else:
            raise ValueError("Empty or unsupported opusFC DataReturn object: " + type(data))

        import_params = ['SRT', 'SNM']

        for param_key in import_params:
            try:
                param = data.parameters[param_key]
            except KeyError:
                pass  # TODO should notify user?
            else:
                try:
                    param_name = opusFC.paramDict[param_key]
                except KeyError:
                    param_name = param_key
                if param_key == 'SRT':
                    var = TimeVariable.make(param_name)
                elif type(param) is float:
                    var = ContinuousVariable.make(param_name)
                elif type(param) is str:
                    var = StringVariable.make(param_name)
                else:
                    raise ValueError #Found a type to handle
                metas.extend([var])
                params = np.full((y_data.shape[0],), param, np.array(param).dtype)
                if meta_data is not None:
                    # NB dtype default will be np.array(fill_value).dtype in future
                    meta_data = np.column_stack((meta_data, params.astype(object)))
                else:
                    meta_data = params

        domain = Orange.data.Domain(attrs, clses, metas)

        meta_data = np.atleast_2d(meta_data)

        table = Orange.data.Table.from_numpy(domain,
                                             y_data.astype(float, order='C'),
                                             metas=meta_data)

        return table
Пример #53
0
    def read(self):
        who = matlab.whosmat(self.filename)
        if not who:
            raise IOError("Couldn't load matlab file " + self.filename)
        else:
            ml = matlab.loadmat(self.filename, chars_as_strings=True)

            ml = {a: b for a, b in ml.items() if isinstance(b, np.ndarray)}

            # X is the biggest numeric array
            numarrays = []
            for name, con in ml.items():
                 if issubclass(con.dtype.type, numbers.Number):
                    numarrays.append((name, reduce(lambda x, y: x*y, con.shape, 1)))
            X = None
            if numarrays:
                nameX = max(numarrays, key=lambda x: x[1])[0]
                X = ml.pop(nameX)

            # find an array with compatible shapes
            attributes = []
            if X is not None:
                nameattributes = None
                for name, con in ml.items():
                    if con.shape in [(X.shape[1],), (1, X.shape[1])]:
                        nameattributes = name
                        break
                attributenames = ml.pop(nameattributes).ravel() if nameattributes else range(X.shape[1])
                attributenames = [str(a).strip() for a in attributenames]  # strip because of numpy char array
                attributes = [ContinuousVariable.make(a) for a in attributenames]

            metas = []
            metaattributes = []

            sizemetas = None
            if X is None:
                counts = defaultdict(list)
                for name, con in ml.items():
                    counts[len(con)].append(name)
                if counts:
                    sizemetas = max(counts.keys(), key=lambda x: len(counts[x]))
            else:
                sizemetas = len(X)
            if sizemetas:
                for name, con in ml.items():
                    if len(con) == sizemetas:
                        metas.append(name)

            metadata = []
            for m in sorted(metas):
                f = ml[m]
                metaattributes.append(StringVariable.make(m))
                f.resize(sizemetas, 1)
                metadata.append(f)

            metadata = np.hstack(tuple(metadata))

            domain = Domain(attributes, metas=metaattributes)
            if X is None:
                X = np.zeros((sizemetas, 0))
            return Orange.data.Table.from_numpy(domain, X, Y=None, metas=metadata)
Пример #54
0
    def read(self):
        who = matlab.whosmat(self.filename)
        if not who:
            raise IOError("Couldn't load matlab file " + self.filename)
        else:
            ml = matlab.loadmat(self.filename, chars_as_strings=True)
            ml = {a: b for a, b in ml.items() if isinstance(b, np.ndarray)}

            def num_elements(array):
                return reduce(lambda x, y: x * y, array.shape, 1)

            def find_biggest(arrays):
                sizes = []
                for n, c in arrays.items():
                    sizes.append((num_elements(c), n))
                return max(sizes)[1]

            def is_string_array(array):
                return issubclass(array.dtype.type, np.str_)

            def is_number_array(array):
                return issubclass(array.dtype.type, numbers.Number)

            numeric = {n: a for n, a in ml.items() if is_number_array(a)}

            # X is the biggest numeric array
            X = ml.pop(find_biggest(numeric)) if numeric else None

            # find an array with compatible shapes
            attributes = []
            if X is not None:
                name_array = None
                for name in sorted(ml):
                    con = ml[name]
                    if con.shape in [(X.shape[1],), (1, X.shape[1])]:
                        name_array = name
                        break
                names = ml.pop(name_array).ravel() if name_array else range(X.shape[1])
                names = [str(a).rstrip() for a in names]  # remove matlab char padding
                attributes = [ContinuousVariable.make(a) for a in names]

            meta_names = []
            metas = []

            meta_size = None
            if X is None:
                counts = defaultdict(list)
                for name, con in ml.items():
                    counts[len(con)].append(name)
                if counts:
                    meta_size = max(counts.keys(), key=lambda x: len(counts[x]))
            else:
                meta_size = len(X)
            if meta_size:
                for name, con in ml.items():
                    if len(con) == meta_size:
                        meta_names.append(name)

            meta_data = []
            for m in sorted(meta_names):
                f = ml[m]
                if is_string_array(f) and len(f.shape) == 1:  # 1D string arrays
                    metas.append(StringVariable.make(m))
                    f = np.array([a.rstrip() for a in f])  # remove matlab char padding
                    f.resize(meta_size, 1)
                    meta_data.append(f)
                elif is_number_array(f) and len(f.shape) == 2:
                    if f.shape[1] == 1:
                        names = [m]
                    else:
                        names = [m + "_" + str(i+1) for i in range(f.shape[1])]
                    for n in names:
                        metas.append(ContinuousVariable.make(n))
                    meta_data.append(f)

            meta_data = np.hstack(tuple(meta_data)) if meta_data else None

            domain = Domain(attributes, metas=metas)
            if X is None:
                X = np.zeros((meta_size, 0))
            return Orange.data.Table.from_numpy(domain, X, Y=None, metas=meta_data)