Пример #1
0
    def send_selection(self):
        if not self.selection or self.data is None:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(
                create_annotated_table(self.data, []))
            return
        filters = []
        self.Warning.no_cont_selection_sql.clear()
        if self.discrete_data is not self.data:
            if isinstance(self.data, SqlTable):
                self.Warning.no_cont_selection_sql()
        for i in self.selection:
            cols, vals, _ = self.areas[i]
            filters.append(
                filter.Values(
                    filter.FilterDiscrete(col, [val])
                    for col, val in zip(cols, vals)))
        if len(filters) > 1:
            filters = filter.Values(filters, conjunction=False)
        else:
            filters = filters[0]
        selection = filters(self.discrete_data)
        idset = set(selection.ids)
        sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
        if self.discrete_data is not self.data:
            selection = self.data[sel_idx]

        self.Outputs.selected_data.send(selection)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.data, sel_idx))
Пример #2
0
    def update_selection(self):
        if self.areas is None or not self.selection:
            self.send("Selection", None)
            return

        filters = []
        for i, area in enumerate(self.areas):
            if i in self.selection:
                width = 4
                val_x, val_y = area.value_pair
                filters.append(
                    filter.Values([
                        filter.FilterDiscrete(self.attrX, [val_x]),
                        filter.FilterDiscrete(self.attrY, [val_y])
                    ]))
            else:
                width = 1
            pen = area.pen()
            pen.setWidth(width)
            area.setPen(pen)
        if len(filters) == 1:
            filters = filters[0]
        else:
            filters = filter.Values(filters, conjunction=False)
        self.send("Selection", filters(self.data))
Пример #3
0
 def send_selection(self):
     if not self.selection or self.data is None:
         self.send("Selected Data", None)
         return
     filters = []
     self.warning(6)
     if self.discrete_data is not self.data:
         if isinstance(self.data, SqlTable):
             self.warning(
                 6,
                 "Selection of continuous variables on SQL is not supported"
             )
     for i in self.selection:
         cols, vals, area = self.areas[i]
         filters.append(
             filter.Values(
                 filter.FilterDiscrete(col, [val])
                 for col, val in zip(cols, vals)))
     if len(filters) > 1:
         filters = filter.Values(filters, conjunction=False)
     else:
         filters = filters[0]
     selection = filters(self.discrete_data)
     if self.discrete_data is not self.data:
         idset = set(selection.ids)
         sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
         selection = self.discrete_data[sel_idx]
     self.send("Selected Data", selection)
Пример #4
0
    def update_selection(self):
        """
        Update the graph (pen width) to show the current selection.
        Filter and output the data.
        """
        if self.areas is None or not self.selection:
            self.send("Selection", None)
            return

        filts = []
        for i, area in enumerate(self.areas):
            if i in self.selection:
                width = 4
                val_x, val_y = area.value_pair
                filts.append(
                    filter.Values([
                        filter.FilterDiscrete(self.attrX, [val_x]),
                        filter.FilterDiscrete(self.attrY, [val_y])
                    ]))
            else:
                width = 1
            pen = area.pen()
            pen.setWidth(width)
            area.setPen(pen)
        if len(filts) == 1:
            filts = filts[0]
        else:
            filts = filter.Values(filts, conjunction=False)
        selection = filts(self.discrete_data)
        if self.discrete_data is not self.data:
            idset = set(selection.ids)
            sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
            selection = self.data[sel_idx]
        self.send("Selection", selection)
Пример #5
0
    def test_discrete_value_filter_with_None(self):
        filtered_data = filter.Values(
            conditions=[filter.FilterDiscrete(3, None)])(self.table)
        correct_data = [row for row in self.data if row[3] is not None]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #6
0
    def test_discrete_value_filter_with_multiple_values(self):
        filtered_data = filter.Values(
            conditions=[filter.FilterDiscrete(3, ["a", "b"])])(self.table)
        correct_data = [row for row in self.data if row[3] in ["a", "b"]]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #7
0
def dataset_download(gds_id, samples=None, transpose=False, callback=None):
    file_name = '{}.tab'.format(gds_id)
    local_files.update(file_name, extract=True, callback=callback)

    table = Table(local_files.localpath_download(file_name))
    title = table.name
    gds_info = local_files.info(file_name)
    table_annotations = {TableAnnotation.tax_id: gds_info['taxid']}

    if callback:
        callback()

    if samples is not None:
        filters = [
            table_filter.FilterStringList(sample, sample_types)
            for sample, sample_types in samples.items()
        ]
        table = table_filter.Values(filters)(table)

        column_values = []
        for meta_var in samples.keys():
            column_values.append(
                table.get_column_view(table.domain[meta_var])[0])

        class_values = list(map('|'.join, zip(*column_values)))

        _class_values = list(set(class_values))
        map_class_values = {
            value: key
            for (key, value) in enumerate(_class_values)
        }
        class_var = DiscreteVariable(name='class', values=_class_values)
        _domain = Domain(table.domain.attributes,
                         table.domain.class_vars + (class_var, ),
                         table.domain.metas)

        table = table.transform(_domain)
        col, _ = table.get_column_view(class_var)
        col[:] = [map_class_values[class_val] for class_val in class_values]

    if transpose:
        table = Table.transpose(table,
                                feature_names_column='sample_id',
                                meta_attr_name='genes')
        table.name = title  # table name is lost after transpose
        table_annotations[TableAnnotation.gene_as_attr_name] = not gds_info[
            TableAnnotation.gene_as_attr_name]
        table_annotations[TableAnnotation.gene_id_column] = gds_info[
            TableAnnotation.gene_id_attribute]
    else:
        table_annotations[TableAnnotation.gene_as_attr_name] = gds_info[
            TableAnnotation.gene_as_attr_name]
        table_annotations[TableAnnotation.gene_id_attribute] = gds_info[
            TableAnnotation.gene_id_attribute]

    if callback:
        callback()

    table.attributes = table_annotations
    return table
Пример #8
0
    def commit(self):
        selection_model = self.data_view.selectionModel()

        if selection_model:
            selection = selection_model.selectedRows(self.COUNT)
            self.selected_rows = [self.filter_proxy_model.mapToSource(sel).row() for sel in selection]

            if selection and self.input_genes:
                genes = [model_index.data(Qt.UserRole) for model_index in selection]
                output_genes = [gene_name for gene_name in list(set.union(*genes))]
                self.num_of_sel_genes = len(output_genes)
                self.update_info_box()

                if self.use_attr_names:
                    selected = [
                        column
                        for column in self.input_data.domain.attributes
                        if self.gene_id_attribute in column.attributes
                        and str(column.attributes[self.gene_id_attribute]) in output_genes
                    ]

                    domain = Domain(selected, self.input_data.domain.class_vars, self.input_data.domain.metas)
                    new_data = self.input_data.from_table(domain, self.input_data)
                    self.Outputs.matched_genes.send(new_data)

                else:
                    # create filter from selected column for genes
                    only_known = table_filter.FilterStringList(self.gene_id_column, output_genes)
                    # apply filter to the data
                    data_table = table_filter.Values([only_known])(self.input_data)

                    self.Outputs.matched_genes.send(data_table)
Пример #9
0
    def __apply_filters(self, data_table):
        set_of_attributes = set([
            key for attr in data_table.domain[:]
            for key in attr.attributes.keys() if key == NCBI_ID
        ])

        gene_id = NCBI_ID if NCBI_ID in data_table.domain or set_of_attributes else None

        if self.include_entrez_id:
            data_table, gene_id = self.__handle_ids(data_table)

        if self.filter_unknown:
            known_input_genes = [
                gene.input_name
                for gene in self.gene_matcher.get_known_genes()
            ]

            if self.use_attr_names:
                temp_domain = Domain([
                    attr for attr in data_table.domain.attributes
                    if attr.name in known_input_genes
                ],
                                     metas=data_table.domain.metas,
                                     class_vars=data_table.domain.class_vars)
                data_table = data_table.transform(temp_domain)
            else:

                # create filter from selected column for genes
                only_known = table_filter.FilterStringList(
                    self.selected_gene_col, known_input_genes)
                # apply filter to the data
                data_table = table_filter.Values([only_known])(data_table)

        return data_table, gene_id
Пример #10
0
def dataset_download(gds_id, samples=None, transpose=False, callback=None):
    file_name = '{}.tab'.format(gds_id)
    file_path = local_files.localpath_download(file_name, extract=True, callback=callback)

    table = Table(file_path)
    title = table.name
    gds_info = local_files.info(file_name)
    table_annotations = {TableAnnotation.tax_id: gds_info['taxid']}

    if callback:
        callback()

    if samples is not None:
        filters = [table_filter.FilterStringList(sample, sample_types) for sample, sample_types in samples.items()]
        table = table_filter.Values(filters)(table)

        column_values = []
        for meta_var in samples.keys():
            column_values.append(table.get_column_view(table.domain[meta_var])[0])

        class_values = list(map('|'.join, zip(*column_values)))

        _class_values = list(set(class_values))
        map_class_values = {value: key for (key, value) in enumerate(_class_values)}
        class_var = DiscreteVariable(name='class', values=_class_values)
        _domain = Domain(table.domain.attributes, table.domain.class_vars + (class_var,), table.domain.metas)

        table = table.transform(_domain)
        col, _ = table.get_column_view(class_var)
        col[:] = [map_class_values[class_val] for class_val in class_values]

    if transpose:
        table = Table.transpose(table, feature_names_column='sample_id', meta_attr_name='genes')

        # When transposing a table, variable.attributes get picked up as numerical values instead of strings.
        # We need to convert from Continuous to StringVariable
        _genes = [
            [str(int(gene)) if not np.isnan(gene) else '?']
            for gene in table.get_column_view('Entrez ID')[0].astype(np.float64)
        ]
        new_var = StringVariable('Entrez ID')
        metas = [var for var in table.domain.metas if var.name != 'Entrez ID'] + [new_var]
        new_domain = Domain(table.domain.attributes, table.domain.class_vars, metas)
        table = table.transform(new_domain)
        table[:, new_var] = _genes

        # table name is lost after transpose
        table.name = title

        table_annotations[TableAnnotation.gene_as_attr_name] = not gds_info[TableAnnotation.gene_as_attr_name]
        table_annotations[TableAnnotation.gene_id_column] = gds_info[TableAnnotation.gene_id_attribute]
    else:
        table_annotations[TableAnnotation.gene_as_attr_name] = gds_info[TableAnnotation.gene_as_attr_name]
        table_annotations[TableAnnotation.gene_id_attribute] = gds_info[TableAnnotation.gene_id_attribute]

    if callback:
        callback()

    table.attributes = table_annotations
    return table
Пример #11
0
    def commit(self):
        matching_output = self.data
        non_matching_output = None
        if self.data:
            domain = self.data.domain
            conditions = []
            for attr_name, oper, values in self.conditions:
                attr_index = domain.index(attr_name)
                attr = domain[attr_index]
                if isinstance(attr, ContinuousVariable):
                    if any(not v for v in values):
                        continue
                    filter = data_filter.FilterContinuous(
                        attr_index, oper, *[float(v) for v in values])
                elif isinstance(attr, StringVariable):
                    if any(v for v in values):
                        continue
                    filter = data_filter.FilterString(
                        attr_index, oper, *[str(v) for v in values])
                else:
                    if oper == 2:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        values = [attr.values[i - 1] for i in values]
                        if oper == 0:
                            f_values = {values[0]}
                        else:
                            f_values = set(attr.values)
                            f_values.remove(values[0])
                    filter = data_filter.FilterDiscrete(attr_index, f_values)
                conditions.append(filter)

            if conditions:
                filters = data_filter.Values(conditions)
                matching_output = filters(self.data)
                filters.negate = True
                non_matching_output = filters(self.data)

            # if hasattr(self.data, "name"):
            #     matching_output.name = self.data.name
            #     non_matching_output.name = self.data.name
            #
            # if self.purge_attributes or self.purge_classes:
            #     remover = orange.RemoveUnusedValues(removeOneValued=True)
            #
            #     newDomain = remover(matching_output, 0, True, self.purge_classes)
            #     if newDomain != matching_output.domain:
            #         matching_output = orange.ExampleTable(newDomain, matching_output)
            #
            #     newDomain = remover(non_matching_output, 0, True, self.purge_classes)
            #     if newDomain != non_matching_output.domain:
            #         nonmatchingOutput = orange.ExampleTable(newDomain, non_matching_output)

        self.send("Matching Data", matching_output)
        self.send("Unmatched Data", non_matching_output)

        self.update_info(matching_output, self.data_out_rows)
Пример #12
0
    def test_continuous_value_filter_isdefined(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterContinuous(1, filter.FilterContinuous.IsDefined)
        ])(self.table)
        correct_data = [row for row in self.data if row[1] is not None]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #13
0
    def test_continuous_value_filter_not_equal(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterContinuous(0, filter.FilterContinuous.NotEqual, 1)
        ])(self.table)
        correct_data = [row for row in self.data if row[0] != 1]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #14
0
    def output_data(self):
        matching_output = self.data
        non_matching_output = None
        if self.data:
            domain = self.data.domain
            filters = data_filter.Values()
            for attr_name, oper, values in self.conditions:
                attr_index = domain.index(attr_name)
                attr = domain[attr_index]
                if isinstance(attr, ContinuousVariable):
                    if any(not v for v in values):
                        continue
                    filter = data_filter.FilterContinuous(
                        attr_index, oper, *[float(v) for v in values])
                elif isinstance(attr, StringVariable):
                    if any(v for v in values):
                        continue
                    filter = data_filter.FilterString(
                        attr_index, oper, *[str(v) for v in values])
                else:
                    if oper in [2, 3]:
                        raise NotImplementedError(
                            "subset filters for discrete attributes are not "
                            "implemented yet")
                    elif oper == 4:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        if oper == 0:
                            f_values = {values[0] - 1}
                        else:
                            f_values = set(range(len(attr.values)))
                            f_values.remove(values[0] - 1)
                    filter = data_filter.FilterDiscrete(attr_index, f_values)
                filters.conditions.append(filter)

            matching_output = filters(self.data)
            filters.negate = True
            non_matching_output = filters(self.data)

            if hasattr(self.data, "name"):
                matching_output.name = self.data.name
                non_matching_output.name = self.data.name
            """
            if self.purge_attributes or self.purge_classes:
                remover = orange.RemoveUnusedValues(removeOneValued=True)

                newDomain = remover(matching_output, 0, True, self.purge_classes)
                if newDomain != matching_output.domain:
                    matching_output = orange.ExampleTable(newDomain, matching_output)

                newDomain = remover(non_matching_output, 0, True, self.purge_classes)
                if newDomain != non_matching_output.domain:
                    nonmatchingOutput = orange.ExampleTable(newDomain, non_matching_output)
            """
        self.send("Matching Data", matching_output)
        self.send("Unmatched Data", non_matching_output)
Пример #15
0
    def test_filter_string_not_equal(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.NotEqual, 'in')
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data if row[0] != 'in']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #16
0
    def test_continuous_value_filter_between(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterContinuous(0, filter.FilterContinuous.Between, 1, 2)
        ])(self.table)
        correct_data = [row for row in self.data
                        if row[0] is not None and 1 <= row[0] <= 2]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #17
0
    def test_continuous_value_filter_greater(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterContinuous(0, filter.FilterContinuous.Greater, 1)
        ])(self.table)
        correct_data = [row for row in self.data
                        if row[0] is not None and row[0] > 1]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #18
0
    def test_filter_string_list_case_insensitive_data(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterStringList(-1, ['donec'], case_sensitive=False)
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data if row[0] in ['Donec']]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #19
0
    def test_continuous_value_filter_outside(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterContinuous(0, filter.FilterContinuous.Outside, 2, 3)
        ])(self.table)
        correct_data = [row for row in self.data
                        if row[0] is not None and not 2 <= row[0] <= 3]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #20
0
    def test_filter_string_is_defined(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.IsDefined)
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data if row[0] is not None]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #21
0
    def test_filter_string_list(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterStringList(-1, ['et', 'in'])
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data if row[0] in ['et', 'in']]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #22
0
    def test_filter_string_ends_with(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.EndsWith, 's')
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and row[0].endswith('s')]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #23
0
    def test_filter_string_outside(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.Outside, 'am', 'di')
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and not 'am' < row[0] < 'di']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #24
0
    def test_filter_string_contains(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.Contains, 'et')
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and 'et' in row[0]]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #25
0
    def test_filter_string_between(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.Between, 'a', 'c')
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and 'a' <= row[0] <= 'c']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #26
0
    def test_filter_string_greater_equal(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.GreaterEqual, 'volutpat')
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and row[0] >= 'volutpat']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #27
0
    def test_filter_string_equal_case_insensitive_value(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.Equal, 'In',
                                case_sensitive=False)
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data if row[0] == 'in']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #28
0
    def test_filter_string_outside_case_insensitive(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.Outside, 'd', 'k',
                                case_sensitive=False)
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and not 'd' < row[0].lower() < 'k']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #29
0
    def test_filter_string_contains_case_insensitive_data(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.Contains, 'do',
                                case_sensitive=False)
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and 'do' in row[0].lower()]

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)
Пример #30
0
    def test_filter_string_between_case_insensitive_value(self):
        filtered_data = filter.Values(conditions=[
            filter.FilterString(-1, filter.FilterString.Between, 'I', 'O',
                                case_sensitive=False)
        ])(self.table)
        correct_data = [SqlRowInstance(filtered_data.domain, row)
                        for row in self.data
                        if row[0] is not None and 'i' < row[0].lower() <= 'o']

        self.assertEqual(len(filtered_data), len(correct_data))
        self.assertSequenceEqual(filtered_data, correct_data)