示例#1
0
class OWGenes(OWWidget, ConcurrentWidgetMixin):
    name = "Genes"
    description = "Tool for working with genes"
    icon = "../widgets/icons/OWGeneInfo.svg"
    priority = 40
    want_main_area = True

    selected_organism: int = Setting(11)
    search_pattern: str = Setting('')
    exclude_unmatched = Setting(True)
    replace_id_with_symbol = Setting(True)
    auto_commit = Setting(True)

    settingsHandler = DomainContextHandler()
    selected_gene_col = ContextSetting(None)
    use_attr_names = ContextSetting(True)

    replaces = [
        'orangecontrib.bioinformatics.widgets.OWGeneNameMatcher.OWGeneNameMatcher'
    ]

    class Inputs:
        data_table = Input("Data", Table)

    class Outputs:
        data_table = Output("Data", Table)
        gene_matcher_results = Output("Genes", Table)

    class Information(OWWidget.Information):
        pass

    def sizeHint(self):
        return QSize(1280, 960)

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)

        # ATTRIBUTES #
        self.target_database = ENTREZ_ID

        # input data
        self.input_data = None
        self.input_genes = None
        self.tax_id = None
        self.column_candidates = []

        # input options
        self.organisms = []

        # gene matcher
        self.gene_matcher = None

        # progress bar
        self.progress_bar = None

        self._timer = QTimer()
        self._timer.timeout.connect(self._apply_filter)
        self._timer.setSingleShot(True)

        # GUI SECTION #

        # Control area
        self.info_box = widgetLabel(
            widgetBox(self.controlArea, "Info", addSpace=True),
            'No data on input.\n')

        organism_box = vBox(self.controlArea, 'Organism')
        self.organism_select_combobox = comboBox(
            organism_box,
            self,
            'selected_organism',
            callback=self.on_input_option_change)

        self.get_available_organisms()
        self.organism_select_combobox.setCurrentIndex(self.selected_organism)

        box = widgetBox(self.controlArea, 'Gene IDs in the input data')
        self.gene_columns_model = itemmodels.DomainModel(
            valid_types=(StringVariable, DiscreteVariable))
        self.gene_column_combobox = comboBox(
            box,
            self,
            'selected_gene_col',
            label='Stored in data column',
            model=self.gene_columns_model,
            sendSelectedValue=True,
            callback=self.on_input_option_change,
        )

        self.attr_names_checkbox = checkBox(
            box,
            self,
            'use_attr_names',
            'Stored as feature (column) names',
            disables=[(-1, self.gene_column_combobox)],
            callback=self.on_input_option_change,
        )

        self.gene_column_combobox.setDisabled(bool(self.use_attr_names))

        output_box = vBox(self.controlArea, 'Output')

        # separator(output_box)
        # output_box.layout().addWidget(horizontal_line())
        # separator(output_box)
        self.exclude_radio = checkBox(output_box,
                                      self,
                                      'exclude_unmatched',
                                      'Exclude unmatched genes',
                                      callback=self.commit)

        self.replace_radio = checkBox(output_box,
                                      self,
                                      'replace_id_with_symbol',
                                      'Replace feature IDs with gene names',
                                      callback=self.commit)

        auto_commit(self.controlArea,
                    self,
                    "auto_commit",
                    "&Commit",
                    box=False)

        rubber(self.controlArea)

        # Main area
        self.filter = lineEdit(self.mainArea,
                               self,
                               'search_pattern',
                               'Filter:',
                               callbackOnType=True,
                               callback=self.handle_filter_callback)
        # rubber(self.radio_group)
        self.mainArea.layout().addWidget(self.filter)

        # set splitter
        self.splitter = QSplitter()
        self.splitter.setOrientation(Qt.Vertical)

        self.table_model = GeneInfoModel()
        self.table_view = QTableView()
        self.table_view.setAlternatingRowColors(True)
        self.table_view.viewport().setMouseTracking(True)
        self.table_view.setSortingEnabled(True)
        self.table_view.setShowGrid(False)
        self.table_view.verticalHeader().hide()
        # self.table_view.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)

        self.unknown_model = UnknownGeneInfoModel()

        self.unknown_view = QTableView()
        self.unknown_view.setModel(self.unknown_model)
        self.unknown_view.verticalHeader().hide()
        self.unknown_view.setShowGrid(False)
        self.unknown_view.setSelectionMode(QAbstractItemView.NoSelection)
        self.unknown_view.horizontalHeader().setSectionResizeMode(
            QHeaderView.Stretch)

        self.splitter.addWidget(self.table_view)
        self.splitter.addWidget(self.unknown_view)

        self.splitter.setStretchFactor(0, 90)
        self.splitter.setStretchFactor(1, 10)

        self.mainArea.layout().addWidget(self.splitter)

    def handle_filter_callback(self):
        self._timer.stop()
        self._timer.start(500)

    def _apply_filter(self):
        # filter only if input data is present and model is populated
        if self.table_model.table is not None:
            self.table_model.update_model(
                filter_pattern=str(self.search_pattern))
            self.commit()

    def __reset_widget_state(self):
        self.table_view.clearSpans()
        self.table_view.setModel(None)
        self.table_model.clear()
        self.unknown_model.clear()
        self._update_info_box()

    def _update_info_box(self):

        if self.input_genes and self.gene_matcher:
            num_genes = len(self.gene_matcher.genes)
            known_genes = len(self.gene_matcher.get_known_genes())

            info_text = ('{} genes in input data\n'
                         '{} genes match Entrez database\n'
                         '{} genes with match conflicts\n'.format(
                             num_genes, known_genes, num_genes - known_genes))

        else:
            info_text = 'No data on input.'

        self.info_box.setText(info_text)

    def on_done(self, _):
        # update info box
        self._update_info_box()

        # set output options
        self.toggle_radio_options()

        # set known genes
        self.table_model.initialize(self.gene_matcher.genes)
        self.table_view.setModel(self.table_model)
        self.table_view.selectionModel().selectionChanged.connect(self.commit)
        self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows)

        self.table_view.setItemDelegateForColumn(
            self.table_model.entrez_column_index,
            LinkStyledItemDelegate(self.table_view))
        v_header = self.table_view.verticalHeader()
        option = self.table_view.viewOptions()
        size = self.table_view.style().sizeFromContents(
            QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view)

        v_header.setDefaultSectionSize(size.height() + 2)
        v_header.setMinimumSectionSize(5)
        self.table_view.horizontalHeader().setStretchLastSection(True)

        # set unknown genes
        self.unknown_model.initialize(self.gene_matcher.genes)
        self.unknown_view.verticalHeader().setStretchLastSection(True)

        self._apply_filter()

    def get_available_organisms(self):
        available_organism = sorted(((tax_id, taxonomy.name(tax_id))
                                     for tax_id in taxonomy.common_taxids()),
                                    key=lambda x: x[1])

        self.organisms = [tax_id[0] for tax_id in available_organism]
        self.organism_select_combobox.addItems(
            [tax_id[1] for tax_id in available_organism])

    def gene_names_from_table(self):
        """ Extract and return gene names from `Orange.data.Table`.
        """
        self.input_genes = []
        if self.input_data:
            if self.use_attr_names:
                self.input_genes = [
                    str(attr.name).strip()
                    for attr in self.input_data.domain.attributes
                ]
            else:
                if self.selected_gene_col is None:
                    self.selected_gene_col = self.gene_column_identifier()

                self.input_genes = [
                    str(e[self.selected_gene_col]) for e in self.input_data
                    if not np.isnan(e[self.selected_gene_col])
                ]

    def _update_gene_matcher(self):
        self.gene_names_from_table()

        self.gene_matcher = GeneMatcher(self.get_selected_organism(),
                                        auto_start=False)
        self.gene_matcher.genes = self.input_genes
        # self.gene_matcher.organism = self.get_selected_organism()

    def get_selected_organism(self):
        return self.organisms[self.selected_organism]

    def _run(self):
        if self.gene_matcher is not None:
            self.start(run_gene_matcher, self.gene_matcher)

    def on_input_option_change(self):
        self.__reset_widget_state()
        self._update_gene_matcher()
        self._run()

    def gene_column_identifier(self):
        """
        Get most suitable column that stores genes. If there are
        several suitable columns, select the one with most unique
        values. Take the best one.
        """

        # candidates -> (variable, num of unique values)
        candidates = ((col,
                       np.unique(self.input_data.get_column_view(col)[0]).size)
                      for col in self.gene_columns_model
                      if isinstance(col, DiscreteVariable)
                      or isinstance(col, StringVariable))

        best_candidate, _ = sorted(candidates, key=lambda x: x[1])[-1]
        return best_candidate

    def find_genes_location(self):
        """ Try locate the genes in the input data when we first load the data.

            Proposed rules:
                - when no suitable feature names are present, check the columns.
                - find the most suitable column, that is, the one with most unique values.

        """
        domain = self.input_data.domain
        if not domain.attributes:
            if self.selected_gene_col is None:
                self.selected_gene_col = self.gene_column_identifier()
                self.use_attr_names = False

    @Inputs.data_table
    def handle_input(self, data):
        self.closeContext()
        self.input_data = None
        self.input_genes = None
        self.__reset_widget_state()
        self.gene_columns_model.set_domain(None)
        self.selected_gene_col = None

        if data:
            self.input_data = data
            self.gene_columns_model.set_domain(self.input_data.domain)

            # check if input table has tax_id, human is used if tax_id is not found
            self.tax_id = str(self.input_data.attributes.get(TAX_ID, '9606'))
            # check for gene location. Default is that genes are attributes in the input table.
            self.use_attr_names = self.input_data.attributes.get(
                GENE_AS_ATTRIBUTE_NAME, self.use_attr_names)

            if self.tax_id in self.organisms and not self.selected_organism:
                self.selected_organism = self.organisms.index(self.tax_id)

            self.openContext(self.input_data.domain)
            self.find_genes_location()
            self.on_input_option_change()

    def commit(self):
        selection = self.table_view.selectionModel().selectedRows(
            self.table_model.entrez_column_index)

        selected_genes = [row.data() for row in selection]
        if not len(selected_genes):
            selected_genes = self.table_model.get_filtered_genes()

        gene_ids = self.get_target_ids()
        known_genes = [gid for gid in gene_ids if gid != '?']

        table = None
        gm_table = None
        if known_genes:
            # Genes are in rows (we have a column with genes).
            if not self.use_attr_names:

                if self.target_database in self.input_data.domain:
                    gene_var = self.input_data.domain[self.target_database]
                    metas = self.input_data.domain.metas
                else:
                    gene_var = StringVariable(self.target_database)
                    metas = self.input_data.domain.metas + (gene_var, )

                domain = Domain(self.input_data.domain.attributes,
                                self.input_data.domain.class_vars, metas)

                table = self.input_data.transform(domain)
                col, _ = table.get_column_view(gene_var)
                col[:] = gene_ids

                # filter selected rows
                selected_genes_set = set(selected_genes)
                selected_rows = [
                    row_index for row_index, row in enumerate(table)
                    if str(row[gene_var]) in selected_genes_set
                ]

                # handle table attributes
                table.attributes[TAX_ID] = self.get_selected_organism()
                table.attributes[GENE_AS_ATTRIBUTE_NAME] = False
                table.attributes[GENE_ID_COLUMN] = self.target_database
                table = table[selected_rows] if selected_rows else table

                if self.exclude_unmatched:
                    # create filter from selected column for genes
                    only_known = table_filter.FilterStringList(
                        gene_var, known_genes)
                    # apply filter to the data
                    table = table_filter.Values([only_known])(table)

                self.Outputs.data_table.send(table)

            # genes are are in columns (genes are features).
            else:
                domain = self.input_data.domain.copy()
                table = self.input_data.transform(domain)

                for gene in self.gene_matcher.genes:
                    if gene.input_identifier in table.domain:

                        table.domain[gene.input_identifier].attributes[
                            self.target_database] = (str(gene.gene_id)
                                                     if gene.gene_id else '?')

                        if self.replace_id_with_symbol:
                            try:
                                table.domain[gene.input_identifier].name = str(
                                    gene.symbol)
                            except AttributeError:
                                # TODO: missing gene symbol, need to handle this?
                                pass

                # filter selected columns
                selected_genes_set = set(selected_genes)
                selected = [
                    column for column in table.domain.attributes
                    if self.target_database in column.attributes
                    and str(column.attributes[
                        self.target_database]) in selected_genes_set
                ]

                output_attrs = table.domain.attributes

                if selected:
                    output_attrs = selected

                if self.exclude_unmatched:
                    known_genes_set = set(known_genes)
                    output_attrs = [
                        col for col in output_attrs if col.attributes[
                            self.target_database] in known_genes_set
                    ]

                domain = Domain(output_attrs, table.domain.class_vars,
                                table.domain.metas)

                table = table.from_table(domain, table)

                # handle table attributes
                table.attributes[TAX_ID] = self.get_selected_organism()
                table.attributes[GENE_AS_ATTRIBUTE_NAME] = True
                table.attributes[GENE_ID_ATTRIBUTE] = self.target_database

            gm_table = self.gene_matcher.to_data_table(
                selected_genes=selected_genes if selected_genes else None)

        self.Outputs.data_table.send(table)
        self.Outputs.gene_matcher_results.send(gm_table)

    def toggle_radio_options(self):
        self.replace_radio.setEnabled(bool(self.use_attr_names))

        if self.gene_matcher.genes:
            # enable checkbox if unknown genes are detected
            self.exclude_radio.setEnabled(
                len(self.gene_matcher.genes) != len(
                    self.gene_matcher.get_known_genes()))
            self.exclude_unmatched = len(self.gene_matcher.genes) != len(
                self.gene_matcher.get_known_genes())

    def get_target_ids(self):
        return [
            str(gene.gene_id) if gene.gene_id else '?'
            for gene in self.gene_matcher.genes
        ]
示例#2
0
class OWGeneNameMatcher(OWWidget):
    name = "Gene Name Matcher"
    description = "Tool for working with genes"
    icon = "../widgets/icons/OWGeneInfo.svg"
    priority = 5
    want_main_area = True

    use_attr_names = Setting(True)
    selected_organism = Setting(11)

    selected_filter = Setting(0)
    gene_as_attr_name = Setting(0)
    filter_unknown = Setting(True)
    include_entrez_id = Setting(True)
    # include_ensembl_id = Setting(True)
    auto_commit = Setting(True)

    class Inputs:
        data_table = Input("Data", Table)

    class Outputs:
        custom_data_table = Output("Data", Table)

    class Information(OWWidget.Information):
        pass

    def sizeHint(self):
        return QSize(1280, 960)

    def __init__(self):
        super().__init__()
        # ATTRIBUTES #

        # input data
        self.input_data = None
        self.input_genes = None
        self.tax_id = None
        self.column_candidates = []
        self.selected_gene_col = None

        # input options
        self.organisms = []

        # gene matcher
        self.gene_matcher = None

        # threads
        self.threadpool = QThreadPool(self)
        self.workers = None

        # progress bar
        self.progress_bar = None

        # filter
        self.filter_labels = ['Unique', 'Partial', 'Unknown']

        # GUI SECTION #

        # Control area
        self.info_box = widgetLabel(
            widgetBox(self.controlArea, "Info", addSpace=True),
            "Initializing\n")

        organism_box = vBox(self.controlArea, 'Organism')
        self.organism_select_combobox = comboBox(
            organism_box,
            self,
            'selected_organism',
            callback=self.on_input_option_change)

        self.get_available_organisms()
        self.organism_select_combobox.setCurrentIndex(self.selected_organism)

        box = widgetBox(self.controlArea, 'Gene names')
        self.gene_columns_model = itemmodels.DomainModel(
            valid_types=(StringVariable, ))
        self.gene_column_combobox = comboBox(
            box,
            self,
            'selected_gene_col',
            model=self.gene_columns_model,
            sendSelectedValue=True,
            callback=self.on_input_option_change)

        self.attr_names_checkbox = checkBox(
            box,
            self,
            'use_attr_names',
            'Use attribute names',
            disables=[(-1, self.gene_column_combobox)],
            callback=self.on_input_option_change)

        self.gene_column_combobox.setDisabled(bool(self.use_attr_names))

        output_box = vBox(self.controlArea, 'Output settings')

        # TODO: will widget support transposing tables?
        # radioButtonsInBox(output_box, self, "gene_as_attr_name", ["Genes in rows", "Genes in columns"],
        # callback=self.on_output_option_change)

        # separator(output_box)
        checkBox(output_box,
                 self,
                 'filter_unknown',
                 'Filter unknown genes',
                 callback=self.on_output_option_change)
        separator(output_box)
        output_box.layout().addWidget(horizontal_line())
        checkBox(output_box,
                 self,
                 'include_entrez_id',
                 'Include Entrez ID',
                 callback=self.on_output_option_change)

        # TODO: provide support for ensembl ids as output option
        # checkBox(output_box, self, 'include_ensembl_id', 'Include Ensembl ID', callback=self.on_output_option_change)

        auto_commit(self.controlArea, self, "auto_commit", label="Commit")

        rubber(self.controlArea)

        # Main area
        filter_box = hBox(self.mainArea, 'Filter results')
        self.radio_group = radioButtons(filter_box,
                                        self,
                                        value='selected_filter',
                                        btnLabels=self.filter_labels,
                                        orientation=Qt.Horizontal,
                                        callback=self.on_filter_changed)
        rubber(self.radio_group)
        self.mainArea.layout().addWidget(filter_box)

        self.proxy_model = FilterProxyModel(self)
        self.extended_view = ExtendedTableView(parent=self)
        self.extended_view.genes_view.setModel(self.proxy_model)
        self.extended_view.genes_selection_model().selectionChanged.connect(
            self.__selection_changed)

        self.mainArea.layout().addWidget(self.extended_view, 1)

    def __reset_widget_state(self):
        self.Outputs.custom_data_table.send(None)
        self.proxy_model.setSourceModel(None)
        self.extended_view.reset_genes_model()
        self.extended_view.reset_info_model()

    def __selection_changed(self):
        genes = [
            model_index.data()
            for model_index in self.extended_view.get_selected_gens()
        ]
        self.extended_view.set_info_model(genes)

    def _update_info_box(self):

        if self.input_genes and self.gene_matcher:
            num_genes = len(self.gene_matcher.genes)
            known_genes = len(self.gene_matcher.get_known_genes())

            info_text = 'Genes on input:  {}\n' \
                        'Known genes :    {} ({:.2f} %)\n'.format(num_genes, known_genes, known_genes * 100 / num_genes)

        else:
            info_text = 'No genes on input'

        self.info_box.setText(info_text)

    def _progress_advance(self):
        # GUI should be updated in main thread. That's why we are calling advance method here
        if self.progress_bar:
            self.progress_bar.advance()

    def _handle_matcher_results(self):
        assert threading.current_thread() == threading.main_thread()

        if self.progress_bar:
            self.progress_bar.finish()
            self.setStatusMessage('')

        # if no known genes, clean up and return
        if not len(self.gene_matcher.get_known_genes()):
            self._update_info_box()
            self.__reset_widget_state()
            return

        self._update_info_box()
        self.extended_view.set_genes_model(self.gene_matcher.genes)
        self.proxy_model.setSourceModel(self.extended_view.genes_model)
        self.extended_view.genes_view.resizeRowsToContents()
        self.commit()

    def get_available_organisms(self):
        available_organism = sorted([(tax_id, taxonomy.name(tax_id))
                                     for tax_id in taxonomy.common_taxids()],
                                    key=lambda x: x[1])

        self.organisms = [tax_id[0] for tax_id in available_organism]
        self.organism_select_combobox.addItems(
            [tax_id[1] for tax_id in available_organism])

    def gene_names_from_table(self):
        """ Extract and return gene names from `Orange.data.Table`.
        """
        self.input_genes = []
        if self.input_data:
            if self.use_attr_names:
                self.input_genes = [
                    str(attr.name).strip()
                    for attr in self.input_data.domain.attributes
                ]
            elif self.selected_gene_col:
                if self.selected_gene_col in self.input_data.domain:
                    self.input_genes = [
                        str(e[self.selected_gene_col]) for e in self.input_data
                        if not np.isnan(e[self.selected_gene_col])
                    ]

    def _update_gene_matcher(self):
        self.gene_names_from_table()

        if not self.input_genes:
            self._update_info_box()

        if not self.gene_matcher:
            self.gene_matcher = GeneMatcher(self.get_selected_organism(),
                                            case_insensitive=True)

        self.gene_matcher.genes = self.input_genes
        self.gene_matcher.organism = self.get_selected_organism()

    def get_selected_organism(self):
        return self.organisms[self.selected_organism]

    def match_genes(self):
        if self.gene_matcher:
            # init progress bar
            self.progress_bar = ProgressBar(self,
                                            iterations=len(
                                                self.gene_matcher.genes))
            # status message
            self.setStatusMessage('Gene matcher running')

            worker = Worker(self.gene_matcher.run_matcher,
                            progress_callback=True)
            worker.signals.progress.connect(self._progress_advance)
            worker.signals.finished.connect(self._handle_matcher_results)

            # move download process to worker thread
            self.threadpool.start(worker)

    def on_input_option_change(self):
        self.__reset_widget_state()
        self._update_gene_matcher()
        self.match_genes()

    @Inputs.data_table
    def handle_input(self, data):
        self.__reset_widget_state()
        self.gene_columns_model.set_domain(None)

        if data:
            self.input_data = data

            self.gene_columns_model.set_domain(self.input_data.domain)

            if self.gene_columns_model:
                self.selected_gene_col = self.gene_columns_model[0]

            self.tax_id = str(self.input_data.attributes.get(TAX_ID, ''))
            self.use_attr_names = self.input_data.attributes.get(
                GENE_AS_ATTRIBUTE_NAME, self.use_attr_names)

            if self.tax_id in self.organisms:
                self.selected_organism = self.organisms.index(self.tax_id)

            self.on_input_option_change()

    @staticmethod
    def get_gene_id_identifier(gene_id_strings):
        # type: (Set[str]) -> str

        if not len(gene_id_strings):
            return NCBI_ID

        regex = re.compile(r'Entrez ID \(.*?\)')
        filtered = filter(regex.search, gene_id_strings)

        return NCBI_ID + ' ({})'.format(len(set(filtered)) + 1)

    def __handle_ids(self, data_table):
        """
        If 'use_attr_names' is True, genes from the input data are in columns.
        """
        if self.use_attr_names:
            # set_of_attributes = set([key for attr in data_table.domain[:] for key in attr.attributes.keys()
            # if key.startswith(NCBI_ID)])
            # gene_id = self.get_gene_id_identifier(set_of_attributes)
            gene_id = NCBI_ID

            for gene in self.gene_matcher.genes:
                if gene.ncbi_id:
                    data_table.domain[
                        gene.input_name].attributes[gene_id] = str(
                            gene.ncbi_id)
        else:
            set_of_variables = set([
                var.name for var in data_table.domain.variables +
                data_table.domain.metas if var.name.startswith(NCBI_ID)
            ])

            gene_id = self.get_gene_id_identifier(set_of_variables)

            temp_domain = Domain([], metas=[StringVariable(gene_id)])
            temp_data = [[str(gene.ncbi_id) if gene.ncbi_id else '?']
                         for gene in self.gene_matcher.genes]
            temp_table = Table(temp_domain, temp_data)

            # if columns differ, then concatenate.
            if NCBI_ID in data_table.domain:
                if gene_id != NCBI_ID and not np.array_equal(
                        np.array(temp_data).ravel(),
                        data_table.get_column_view(NCBI_ID)[0]):

                    data_table = Table.concatenate([data_table, temp_table])
                else:
                    gene_id = NCBI_ID
            else:
                data_table = Table.concatenate([data_table, temp_table])

        return data_table, gene_id

    def __apply_filters(self, data_table):
        set_of_attributes = set([
            key for attr in data_table.domain[:]
            for key in attr.attributes.keys() if key == NCBI_ID
        ])

        gene_id = NCBI_ID if NCBI_ID in data_table.domain or set_of_attributes else None

        if self.include_entrez_id:
            data_table, gene_id = self.__handle_ids(data_table)

        if self.filter_unknown:
            known_input_genes = [
                gene.input_name
                for gene in self.gene_matcher.get_known_genes()
            ]

            if self.use_attr_names:
                temp_domain = Domain([
                    attr for attr in data_table.domain.attributes
                    if attr.name in known_input_genes
                ],
                                     metas=data_table.domain.metas,
                                     class_vars=data_table.domain.class_vars)
                data_table = data_table.transform(temp_domain)
            else:

                # create filter from selected column for genes
                only_known = table_filter.FilterStringList(
                    self.selected_gene_col, known_input_genes)
                # apply filter to the data
                data_table = table_filter.Values([only_known])(data_table)

        return data_table, gene_id

    def commit(self):
        self.Outputs.custom_data_table.send(None)

        if not self.input_data:
            return

        if not self.use_attr_names and not self.gene_columns_model:
            return

        output_data_table = self.input_data.transform(
            self.input_data.domain.copy())
        output_data_table, gene_id = self.__apply_filters(
            output_data_table.copy())

        # handle table attributes
        output_data_table.attributes[TAX_ID] = self.get_selected_organism()
        output_data_table.attributes[GENE_AS_ATTRIBUTE_NAME] = bool(
            self.use_attr_names)

        if not bool(self.use_attr_names):
            output_data_table.attributes[GENE_ID_COLUMN] = gene_id
        else:
            output_data_table.attributes[GENE_ID_ATTRIBUTE] = gene_id

        self.Outputs.custom_data_table.send(output_data_table)
        # gene_objs = [self.proxy_model.index(row, 0).data() for row in range(self.proxy_model.rowCount())]

    def on_output_option_change(self):
        self.commit()

    def on_filter_changed(self):
        self.proxy_model.invalidateFilter()
        self.extended_view.genes_view.resizeRowsToContents()