class OWConcordance(OWWidget): name = "Concordance" description = "Display the context of the word." icon = "icons/Concordance.svg" priority = 520 class Inputs: corpus = Input("Corpus", Corpus) query_word = Input("Query Word", Topic) class Outputs: selected_documents = Output("Selected Documents", Corpus) concordances = Output("Concordances", Corpus) settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL) autocommit = Setting(True) context_width = Setting(5) word = ContextSetting("", exclude_metas=False) selected_rows = Setting([], schema_only=True) class Warning(OWWidget.Warning): multiple_words_on_input = Msg("Multiple query words on input. " "Only the first one is considered!") def __init__(self): super().__init__() self.corpus = None # Corpus self.n_matching = '' # Info on docs matching the word self.n_tokens = '' # Info on tokens self.n_types = '' # Info on types (unique tokens) self.is_word_on_input = False # Info attributes info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Tokens: %(n_tokens)s') gui.label(info_box, self, 'Types: %(n_types)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Width parameter gui.spin(self.controlArea, self, 'context_width', 3, 10, box=True, label="Number of words:", callback=self.set_width) gui.rubber(self.controlArea) # Search c_box = gui.widgetBox(self.mainArea, orientation="vertical") self.input = gui.lineEdit(c_box, self, 'word', orientation=Qt.Horizontal, sizePolicy=QSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='Query:', callback=self.set_word, callbackOnType=True) self.input.setFocus() # Concordances view self.conc_view = QTableView() self.model = ConcordanceModel() self.conc_view.setModel(self.model) self.conc_view.setWordWrap(False) self.conc_view.setSelectionBehavior(QTableView.SelectRows) self.conc_view.setSelectionModel(DocumentSelectionModel(self.model)) self.conc_view.setItemDelegate(HorizontalGridDelegate()) self.conc_view.selectionModel().selectionChanged.connect( self.selection_changed) self.conc_view.horizontalHeader().hide() self.conc_view.setShowGrid(False) self.mainArea.layout().addWidget(self.conc_view) self.set_width() # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit', 'Auto commit is on') def sizeHint(self): # pragma: no cover return QSize(600, 400) def set_width(self): sel = self.conc_view.selectionModel().selection() self.model.set_width(self.context_width) if sel: self.conc_view.selectionModel().select( sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) def selection_changed(self): selection = self.conc_view.selectionModel().selection() self.selected_rows = sorted( set(cell.row() for cell in selection.indexes())) self.commit() def set_selection(self, selection): if selection: sel = QItemSelection() for row in selection: index = self.conc_view.model().index(row, 0) sel.select(index, index) self.conc_view.selectionModel().select( sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) @Inputs.corpus def set_corpus(self, data=None): self.closeContext() self.corpus = data if data is None: # data removed, clear selection self.selected_rows = [] if not self.is_word_on_input: self.word = "" self.openContext(self.corpus) self.model.set_corpus(self.corpus) self.set_word() @Inputs.query_word def set_word_from_input(self, topic): self.Warning.multiple_words_on_input.clear() if self.is_word_on_input: # word changed, clear selection self.selected_rows = [] self.is_word_on_input = topic is not None and len(topic) > 0 self.input.setEnabled(not self.is_word_on_input) if self.is_word_on_input: if len(topic) > 1: self.Warning.multiple_words_on_input() self.word = topic.metas[0, 0] self.set_word() def set_word(self): self.selected_rows = [] self.model.set_word(self.word) self.update_widget() self.commit() def handleNewSignals(self): self.set_selection(self.selected_rows) def resize_columns(self): col_width = (self.conc_view.width() - self.conc_view.columnWidth(1)) / 2 - 12 self.conc_view.setColumnWidth(0, col_width) self.conc_view.setColumnWidth(2, col_width) def resizeEvent(self, event): # pragma: no cover super().resizeEvent(event) self.resize_columns() def update_widget(self): self.conc_view.resizeColumnToContents(1) self.resize_columns() self.conc_view.resizeRowsToContents() if self.corpus is not None: self.n_matching = '{}/{}'.format( self.model.matching_docs() if self.word else 0, len(self.corpus)) self.n_tokens = self.model.n_tokens self.n_types = self.model.n_types else: self.n_matching = '' self.n_tokens = '' self.n_types = '' def commit(self): selected_docs = sorted( set(self.model.word_index[row][0] for row in self.selected_rows)) concordance = self.model.get_data() if selected_docs: selected = self.corpus[selected_docs] self.Outputs.selected_documents.send(selected) else: self.Outputs.selected_documents.send(None) self.Outputs.concordances.send(concordance) def send_report(self): view = self.conc_view model = self.conc_view.model() self.report_items("Concordances", ( ("Query", model.word), ("Tokens", model.n_tokens), ("Types", model.n_types), ("Matching", self.n_matching), )) self.report_table(view)
class OWClusterAnalysis(OWWidget): name = "Cluster Analysis" description = "The widget displays differentially expressed genes that characterize the cluster, " \ "and corresponding gene terms that describe differentially expressed genes" icon = "../widgets/icons/OWClusterAnalysis.svg" priority = 100 class Inputs: data_table = Input('Data', Table) custom_sets = Input('Custom Gene Sets', Table) class Outputs: selected_data = Output('Selected Data', Table) gene_scores = Output('Gene Scores', Table) gene_set_scores = Output('Gene Set Scores', Table) class Information(OWWidget.Information): pass class Warning(OWWidget.Warning): gene_enrichment = Msg('{}, {}.') no_selected_gene_sets = Msg( 'No gene set selected, select them from Gene Sets box.') class Error(OWWidget.Error): no_cluster_indicator = Msg('No cluster indicator in the input data') gene_as_attributes = Msg( 'Genes, in the input data, are expected as column names') organism_mismatch = Msg( 'Organism in input data and custom gene sets does not match') cluster_batch_conflict = Msg( 'Cluster and batch must not be the same variable') settingsHandler = ClusterAnalysisContextHandler() cluster_indicators = ContextSetting([]) batch_indicator = ContextSetting(None) stored_gene_sets_selection = ContextSetting(tuple()) scoring_method_selection = ContextSetting(0) scoring_method_design = ContextSetting(0) scoring_test_type = ContextSetting(0) # genes filter max_gene_count = Setting(20) use_gene_count_filter = Setting(True) max_gene_p_value = Setting(0.1) use_gene_pval_filter = Setting(False) max_gene_fdr = Setting(0.1) use_gene_fdr_filter = Setting(True) # gene sets filter min_gs_count = Setting(5) use_gs_count_filter = Setting(True) max_gs_p_value = Setting(0.1) use_gs_pval_filter = Setting(False) max_gs_fdr = Setting(0.1) use_gs_max_fdr = Setting(True) # auto commit results auto_commit = settings.Setting(False) custom_gene_set_indicator = settings.Setting(None) def __init__(self): super().__init__() # widget attributes self.input_data = None self.store_input_domain = None self.input_genes_names = [] self.input_genes_ids = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None # custom gene set input self.feature_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, StringVariable)) self.custom_data = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.num_of_custom_sets = None self.rows_by_cluster = None self.rows_by_batch = None self.clusters = [] self.new_cluster_profile = [] # data model self.cluster_info_model = None # Info info_box = vBox(self.controlArea, 'Info') self.input_info = widgetLabel(info_box) # Cluster selection self.cluster_indicator_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, ), separators=False) self.cluster_indicator_box = widgetBox(self.controlArea, 'Cluster Indicator') self.cluster_indicator_view = listView( self.cluster_indicator_box, self, 'cluster_indicators', model=self.cluster_indicator_model, selectionMode=QListWidget.MultiSelection, callback=self.invalidate, sizeHint=QSize(256, 70)) # Batch selection self.batch_indicator_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, ), separators=False, placeholder="") box = widgetBox(self.controlArea, 'Batch Indicator') self.batch_indicator_combobox = comboBox( box, self, 'batch_indicator', model=self.batch_indicator_model, sendSelectedValue=True, callback=self.batch_indicator_changed) # Gene scoring box = widgetBox(self.controlArea, 'Gene Scoring') self.gene_scoring = GeneScoringWidget(box, self) self.gene_scoring.set_method_selection_area('scoring_method_selection') self.gene_scoring.set_method_design_area('scoring_method_design') self.gene_scoring.set_test_type('scoring_test_type') # Gene Sets widget gene_sets_box = widgetBox(self.controlArea, "Gene Sets") self.gs_widget = GeneSetsSelection(gene_sets_box, self, 'stored_gene_sets_selection') self.gs_widget.hierarchy_tree_widget.itemClicked.connect( self.__gene_sets_enrichment) # custom gene sets area box = vBox(self.controlArea, "Custom Gene Sets") if self.custom_gene_set_indicator not in self.feature_model: self.custom_gene_set_indicator = None self.gs_label_combobox = comboBox( box, self, "custom_gene_set_indicator", sendSelectedValue=True, model=self.feature_model, callback=self.handle_custom_gene_sets) self.gs_label_combobox.setDisabled(True) # main area splitter = QSplitter(Qt.Horizontal, self.mainArea) self.mainArea.layout().addWidget(splitter) genes_filter = widgetBox(splitter, 'Filter Genes', orientation=QHBoxLayout()) spin(genes_filter, self, 'max_gene_count', 0, 10000, label='Count', tooltip='Minimum genes count', checked='use_gene_count_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes) doubleSpin(genes_filter, self, 'max_gene_p_value', 0.0, 1.0, 0.0001, label='p-value', tooltip='Maximum p-value of the enrichment score', checked='use_gene_pval_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes) doubleSpin(genes_filter, self, 'max_gene_fdr', 0.0, 1.0, 0.0001, label='FDR', tooltip='Maximum false discovery rate', checked='use_gene_fdr_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes) gene_sets_filter = widgetBox(splitter, 'Filter Gene Sets', orientation=QHBoxLayout()) spin(gene_sets_filter, self, 'min_gs_count', 0, DISPLAY_GENE_SETS_COUNT, label='Count', tooltip='Minimum genes count', checked='use_gs_count_filter', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets) doubleSpin(gene_sets_filter, self, 'max_gs_p_value', 0.0, 1.0, 0.0001, label='p-value', tooltip='Maximum p-value of the enrichment score', checked='use_gs_pval_filter', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets) doubleSpin(gene_sets_filter, self, 'max_gs_fdr', 0.0, 1.0, 0.0001, label='FDR', tooltip='Maximum false discovery rate', checked='use_gs_max_fdr', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets) self.cluster_info_view = QTableView() self.cluster_info_view.verticalHeader().setVisible(False) self.cluster_info_view.setItemDelegate(HTMLDelegate()) self.cluster_info_view.horizontalHeader().hide() self.cluster_info_view.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) self.mainArea.layout().addWidget(self.cluster_info_view) def sizeHint(self): return QSize(800, 600) def __update_info_box(self): info_string = '' if self.input_genes_ids: info_string += '{} samples, {} clusters\n'.format( self.input_data.X.shape[0], len(self.clusters) if self.clusters else '?') info_string += '{:,d} unique genes\n'.format( len(self.input_genes_ids)) else: info_string += 'No genes on input.\n' if self.custom_data: info_string += '{} marker genes in {} sets\n'.format( self.custom_data.X.shape[0], self.num_of_custom_sets) self.input_info.setText(info_string) def __set_cluster_info_model(self): self.cluster_info_view.setModel(None) self.cluster_info_model = ClusterModel(self) self.cluster_info_model.add_rows(self.clusters) # add model to the view self.cluster_info_view.setModel(self.cluster_info_model) # call sizeHint function self.cluster_info_view.resizeRowsToContents() self.cluster_info_view.selectionModel().selectionChanged.connect( self.commit) def __create_temp_class_var(self): """ See no evil !""" cluster_indicator_name = 'Cluster indicators' var_index_lookup = dict([(val, idx) for var in self.cluster_indicators for idx, val in enumerate(var.values)]) row_profile = None new_cluster_values = [] cart_prod = itertools.product( *[cluster.values for cluster in self.cluster_indicators]) for comb in cart_prod: new_cluster_values.append(', '.join([val for val in comb])) self.new_cluster_profile.append( [var_index_lookup[val] for val in comb]) row_profile_lookup = dict([(tuple(profile), indx) for indx, ( profile, _) in enumerate(zip(self.new_cluster_profile, new_cluster_values)) ]) for var in self.cluster_indicators: if row_profile is None: row_profile = np.asarray( self.input_data.get_column_view(var)[0], dtype=int) else: row_profile = np.vstack( (row_profile, np.asarray(self.input_data.get_column_view(var)[0], dtype=int))) ca_ind = DiscreteVariable.make( cluster_indicator_name, values=[val for val in new_cluster_values], ordered=True) domain = Domain(self.input_data.domain.attributes, self.input_data.domain.class_vars, self.input_data.domain.metas + (ca_ind, )) table = self.input_data.transform(domain) table[:, ca_ind] = np.array( [[row_profile_lookup[tuple(row_profile[:, i])]] for i in range(row_profile.shape[1])]) self.input_data = table return ca_ind def __set_clusters(self): self.clusters = [] self.new_cluster_profile = [] self.cluster_var = None if self.cluster_indicators and self.input_data: if isinstance(self.cluster_indicators, list) and len(self.cluster_indicators) > 1: self.cluster_var = self.__create_temp_class_var() else: self.cluster_var = self.cluster_indicators[0] self.rows_by_cluster = np.asarray(self.input_data.get_column_view( self.cluster_var)[0], dtype=int) for index, name in enumerate(self.cluster_var.values): cluster = Cluster(name, index) self.clusters.append(cluster) cluster.set_genes(self.input_genes_names, self.input_genes_ids) def __set_batch(self): self.Error.cluster_batch_conflict.clear() self.rows_by_batch = None if self.batch_indicator == self.cluster_var: self.Error.cluster_batch_conflict() return if self.batch_indicator and self.input_data: self.rows_by_batch = np.asarray(self.input_data.get_column_view( self.batch_indicator)[0], dtype=int) def __set_genes(self): self.input_genes_names = [] self.input_genes_ids = [] if self.use_attr_names: for variable in self.input_data.domain.attributes: self.input_genes_names.append(str(variable.name)) self.input_genes_ids.append( str(variable.attributes.get(self.gene_id_attribute, np.nan))) def filter_genes(self): if self.cluster_info_model: # filter genes # note: after gene filter is applied, we need to recalculate gene set enrichment self.cluster_info_model.apply_gene_filters( self.max_gene_p_value if self.use_gene_pval_filter else None, self.max_gene_fdr if self.use_gene_fdr_filter else None, self.max_gene_count if self.use_gene_count_filter else None) # recalculate gene set enrichment self.__gene_sets_enrichment() # call sizeHint function self.cluster_info_view.resizeRowsToContents() # commit changes after filter self.commit() def filter_gene_sets(self): if self.cluster_info_model: # filter gene sets self.cluster_info_model.apply_gene_sets_filters( self.max_gs_p_value if self.use_gs_pval_filter else None, self.max_gs_fdr if self.use_gs_max_fdr else None, self.min_gs_count if self.use_gs_count_filter else None) # call sizeHint function self.cluster_info_view.resizeRowsToContents() def __gene_enrichment(self): design = bool(self.gene_scoring.get_selected_desig() ) # if true cluster vs. cluster else cluster vs rest test_type = self.gene_scoring.get_selected_test_type() method = self.gene_scoring.get_selected_method() try: if method.score_function == score_hypergeometric_test: values = set(np.unique(self.input_data.X)) if (0 not in values) or (len(values) != 2): raise ValueError('Binary data expected (use Preprocess)') self.cluster_info_model.score_genes( design=design, table_x=self.input_data.X, rows_by_cluster=self.rows_by_cluster, rows_by_batch=self.rows_by_batch, method=method, alternative=test_type) except ValueError as e: self.Warning.gene_enrichment(str(e), 'p-values are set to 1') def __gene_sets_enrichment(self): if self.input_data: self.Warning.no_selected_gene_sets.clear() all_sets = self.gs_widget.get_hierarchies() selected_sets = self.gs_widget.get_hierarchies(only_selected=True) if len(selected_sets) == 0 and len(all_sets) > 0: self.Warning.no_selected_gene_sets() # save setting on selected hierarchies self.stored_gene_sets_selection = tuple(selected_sets) ref_genes = set(self.input_genes_ids) try: self.cluster_info_model.gene_sets_enrichment( self.gs_widget.gs_object, selected_sets, ref_genes) except Exception as e: # TODO: possible exceptions? raise e self.filter_gene_sets() def invalidate(self, cluster_init=True): if self.input_data is not None and self.tax_id is not None: self.Warning.gene_enrichment.clear() if self.cluster_info_model is not None: self.cluster_info_model.cancel() self.__set_genes() if cluster_init: self.__set_clusters() self.__set_batch() self.__set_cluster_info_model() # note: when calling self.__gene_enrichment we calculate gse automatically. # No need to call self.__gene_sets_enrichment here self.__gene_enrichment() self.__update_info_box() def batch_indicator_changed(self): self.invalidate(cluster_init=False) @Inputs.data_table def handle_input(self, data): self.closeContext() self.Warning.clear() self.Error.clear() self.input_data = None self.store_input_domain = None self.stored_gene_sets_selection = tuple() self.input_genes_names = [] self.input_genes_ids = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.clusters = None self.gs_widget.clear() self.gs_widget.clear_gene_sets() self.cluster_info_view.setModel(None) self.cluster_indicators = [] self.cluster_var = None self.batch_indicator = None self.cluster_indicator_model.set_domain(None) self.batch_indicator_model.set_domain(None) self.__update_info_box() if data: self.input_data = data self.cluster_indicator_model.set_domain(self.input_data.domain) self.batch_indicator_model.set_domain(self.input_data.domain) # For Cluster Indicator do not use categorical variables that contain only one value. self.cluster_indicator_model.wrap([ item for item in self.cluster_indicator_model if len(item.values) > 1 ]) # First value in batch indicator model is a NoneType, # we can skip it when we validate categorical variables self.batch_indicator_model.wrap(self.batch_indicator_model[:1] + [ item for item in self.batch_indicator_model[1:] if len(item.values) > 1 ]) self.tax_id = self.input_data.attributes.get(TAX_ID, None) self.use_attr_names = self.input_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.gene_id_attribute = self.input_data.attributes.get( GENE_ID_ATTRIBUTE, None) if not self.cluster_indicator_model: self.Error.no_cluster_indicator() return elif not self.use_attr_names: self.Error.gene_as_attributes() return self.openContext(self.input_data.domain) self.gs_widget.load_gene_sets(self.tax_id) if self.cluster_indicator_model and len( self.cluster_indicators) < 1: self.cluster_indicators = [self.cluster_indicator_model[0]] if self.batch_indicator_model and self.batch_indicator is None: self.batch_indicator = self.batch_indicator_model[0] self.invalidate() if self.custom_data: self.refresh_custom_gene_sets() self._handle_future_model() self.handle_custom_gene_sets() @Inputs.custom_sets def handle_custom_input(self, data): self.Error.clear() self.Warning.clear() self.closeContext() self.custom_data = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.num_of_custom_sets = None self.feature_model.set_domain(None) if data: self.custom_data = data self.feature_model.set_domain(self.custom_data.domain) self.custom_tax_id = str( self.custom_data.attributes.get(TAX_ID, None)) self.custom_use_attr_names = self.custom_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.custom_gene_id_attribute = self.custom_data.attributes.get( GENE_ID_ATTRIBUTE, None) self.custom_gene_id_column = self.custom_data.attributes.get( GENE_ID_COLUMN, None) self._handle_future_model() if self.input_data: self.openContext(self.input_data.domain) self.gs_label_combobox.setDisabled(True) self.refresh_custom_gene_sets() self.handle_custom_gene_sets(select_customs_flag=True) def __check_organism_mismatch(self): """ Check if organisms from different inputs match. :return: True if there is a mismatch """ if self.tax_id is not None and self.custom_tax_id is not None: return self.tax_id != self.custom_tax_id return False def _handle_future_model(self): if self.custom_gene_set_indicator in self.feature_model: index = self.feature_model.indexOf(self.custom_gene_set_indicator) self.custom_gene_set_indicator = self.feature_model[index] else: if self.feature_model: self.custom_gene_set_indicator = self.feature_model[0] else: self.custom_gene_set_indicator = None def handle_custom_gene_sets(self, select_customs_flag=False): if self.custom_gene_set_indicator: if self.custom_data is not None and self.custom_gene_id_column is not None: if self.__check_organism_mismatch(): self.gs_label_combobox.setDisabled(True) self.Error.organism_mismatch() self.gs_widget.update_gs_hierarchy() self.__gene_sets_enrichment() return if isinstance(self.custom_gene_set_indicator, DiscreteVariable): labels = self.custom_gene_set_indicator.values gene_sets_names = [ labels[int(idx)] for idx in self.custom_data.get_column_view( self.custom_gene_set_indicator)[0] ] else: gene_sets_names, _ = self.custom_data.get_column_view( self.custom_gene_set_indicator) self.num_of_custom_sets = len(set(gene_sets_names)) gene_names, _ = self.custom_data.get_column_view( self.custom_gene_id_column) hierarchy_title = (self.custom_data.name if self.custom_data.name else 'Custom sets', ) try: self.gs_widget.add_custom_sets( gene_sets_names, gene_names, hierarchy_title=hierarchy_title, select_customs_flag=select_customs_flag) except GeneSetException: pass self.gs_label_combobox.setDisabled(False) else: self.gs_widget.update_gs_hierarchy() self.__gene_sets_enrichment() self.__update_info_box() def refresh_custom_gene_sets(self): self.gs_widget.clear_custom_sets() # self.gs_widget.update_gs_hierarchy() def gene_scores_output(self, selected_clusters): metas = [ StringVariable('Gene'), StringVariable(NCBI_ID), StringVariable('Rank'), ContinuousVariable('Statistic score'), ContinuousVariable('P-value'), ContinuousVariable('FDR') ] if len(self.new_cluster_profile): # note: order is important metas = self.cluster_indicators + metas domain = Domain([], metas=metas, class_vars=self.cluster_var) data = [] for cluster in selected_clusters: num_of_genes = len(cluster.filtered_genes) scores = [gene.score for gene in cluster.filtered_genes] p_vals = [gene.p_val for gene in cluster.filtered_genes] fdr_vals = [gene.fdr for gene in cluster.filtered_genes] gene_names = [gene.input_name for gene in cluster.filtered_genes] gene_ids = [gene.ncbi_id for gene in cluster.filtered_genes] rank = rankdata(p_vals, method='min') if len(self.new_cluster_profile): profiles = [[cluster.index] * num_of_genes] [ profiles.append([p] * num_of_genes) for p in self.new_cluster_profile[cluster.index] ] else: profiles = [[cluster.index] * num_of_genes] for row in zip(*profiles, gene_names, gene_ids, rank, scores, p_vals, fdr_vals): data.append(list(row)) out_data = Table(domain, data) out_data.attributes[TAX_ID] = self.tax_id out_data.attributes[GENE_AS_ATTRIBUTE_NAME] = False out_data.attributes[GENE_ID_COLUMN] = NCBI_ID self.Outputs.gene_scores.send(out_data) def gene_set_scores_output(self, selected_clusters): metas = [ StringVariable('Term'), StringVariable('Term ID'), StringVariable('Rank'), ContinuousVariable('P-value'), ContinuousVariable('FDR') ] if len(self.new_cluster_profile): # note: order is important metas = self.cluster_indicators + metas domain = Domain([], metas=metas, class_vars=self.cluster_var) data = [] for cluster in selected_clusters: num_of_sets = len(cluster.filtered_gene_sets) p_vals = [gs.p_val for gs in cluster.filtered_gene_sets] fdr_vals = [gs.fdr for gs in cluster.filtered_gene_sets] gs_names = [gs.name for gs in cluster.filtered_gene_sets] gs_ids = [gs.gs_id for gs in cluster.filtered_gene_sets] rank = rankdata(p_vals, method='min') if len(self.new_cluster_profile): profiles = [[cluster.index] * num_of_sets] [ profiles.append([p] * num_of_sets) for p in self.new_cluster_profile[cluster.index] ] else: profiles = [[cluster.index] * num_of_sets] for row in zip(*profiles, gs_names, gs_ids, rank, p_vals, fdr_vals): data.append(list(row)) self.Outputs.gene_set_scores.send(Table(domain, data)) def commit(self): selection_model = self.cluster_info_view.selectionModel() selected_rows = selection_model.selectedRows() selected_clusters = [] selected_cluster_indexes = set() selected_cluster_genes = set() if not self.input_data or not selected_rows: self.Outputs.selected_data.send(None) return for sel_row in selected_rows: cluster = sel_row.data() selected_clusters.append(cluster) selected_cluster_indexes.add(cluster.index) [ selected_cluster_genes.add(gene.ncbi_id) for gene in cluster.filtered_genes ] # get columns of selected clusters selected_columns = [ column for column in self.input_data.domain.attributes if self.gene_id_attribute in column.attributes and str(column.attributes[ self.gene_id_attribute]) in selected_cluster_genes ] domain = Domain(selected_columns, self.input_data.domain.class_vars, self.input_data.domain.metas) output_data = self.input_data.from_table(domain, self.input_data) # get rows of selected clusters selected_rows = [ row_index for row_index, col_index in enumerate(self.rows_by_cluster) if col_index in selected_cluster_indexes ] # send to output signal self.Outputs.selected_data.send(output_data[selected_rows]) self.gene_scores_output(selected_clusters) self.gene_set_scores_output(selected_clusters)
class OWCreateInstance(OWWidget): name = "Create Instance" description = "Interactively create a data instance from sample dataset." icon = "icons/CreateInstance.svg" category = "Data" keywords = ["simulator"] priority = 4000 class Inputs: data = Input("Data", Table) reference = Input("Reference", Table) class Outputs: data = Output("Data", Table) class Information(OWWidget.Information): nans_removed = Msg("Variables with only missing values were " "removed from the list.") want_main_area = False ACTIONS = ["median", "mean", "random", "input"] HEADER = [["name", "Variable"], ["variable", "Value"]] Header = namedtuple("header", [tag for tag, _ in HEADER])(*range(len(HEADER))) values: Dict[str, Union[float, str]] = Setting({}, schema_only=True) append_to_data = Setting(True) auto_commit = Setting(True) def __init__(self): super().__init__() self.data: Optional[Table] = None self.reference: Optional[Table] = None self.filter_edit = QLineEdit(textChanged=self.__filter_edit_changed, placeholderText="Filter...") self.view = QTableView(sortingEnabled=True, contextMenuPolicy=Qt.CustomContextMenu, selectionMode=QTableView.NoSelection) self.view.customContextMenuRequested.connect(self.__menu_requested) self.view.setItemDelegateForColumn(self.Header.variable, VariableDelegate(self)) self.view.verticalHeader().hide() self.view.horizontalHeader().setStretchLastSection(True) self.view.horizontalHeader().setMaximumSectionSize(350) self.model = VariableItemModel(self) self.model.setHorizontalHeaderLabels([x for _, x in self.HEADER]) self.model.dataChanged.connect(self.__table_data_changed) self.model.dataHasNanColumn.connect(self.Information.nans_removed) self.proxy_model = QSortFilterProxyModel() self.proxy_model.setFilterKeyColumn(-1) self.proxy_model.setFilterCaseSensitivity(False) self.proxy_model.setSourceModel(self.model) self.view.setModel(self.proxy_model) vbox = gui.vBox(self.controlArea, box=True) vbox.layout().addWidget(self.filter_edit) vbox.layout().addWidget(self.view) box = gui.hBox(vbox) gui.rubber(box) for name in self.ACTIONS: gui.button(box, self, name.capitalize(), lambda *args, fun=name: self._initialize_values(fun), autoDefault=False) gui.rubber(box) box = gui.auto_apply(self.controlArea, self, "auto_commit") box.button.setFixedWidth(180) box.layout().insertStretch(0) # pylint: disable=unnecessary-lambda append = gui.checkBox(None, self, "append_to_data", "Append this instance to input data", callback=lambda: self.commit()) box.layout().insertWidget(0, append) self._set_input_summary() self._set_output_summary() self.settingsAboutToBePacked.connect(self.pack_settings) def __filter_edit_changed(self): self.proxy_model.setFilterFixedString(self.filter_edit.text().strip()) def __table_data_changed(self): self.commit() def __menu_requested(self, point: QPoint): index = self.view.indexAt(point) model: QSortFilterProxyModel = index.model() source_index = model.mapToSource(index) menu = QMenu(self) for action in self._create_actions(source_index): menu.addAction(action) menu.popup(self.view.viewport().mapToGlobal(point)) def _create_actions(self, index: QModelIndex) -> List[QAction]: actions = [] for name in self.ACTIONS: action = QAction(name.capitalize(), self) action.triggered.connect( lambda *args, fun=name: self._initialize_values(fun, [index])) actions.append(action) return actions def _initialize_values(self, fun: str, indices: List[QModelIndex] = None): cont_fun = { "median": np.nanmedian, "mean": np.nanmean, "random": cont_random, "input": np.nanmean }.get(fun, NotImplemented) disc_fun = { "median": majority, "mean": majority, "random": disc_random, "input": majority }.get(fun, NotImplemented) if not self.data or fun == "input" and not self.reference: return self.model.dataChanged.disconnect(self.__table_data_changed) rows = range(self.proxy_model.rowCount()) if indices is None else \ [index.row() for index in indices] for row in rows: index = self.model.index(row, self.Header.variable) variable = self.model.data(index, VariableRole) if fun == "input": if variable not in self.reference.domain: continue values = self.reference.get_column_view(variable)[0] if variable.is_primitive(): values = values.astype(float) if all(np.isnan(values)): continue else: values = self.model.data(index, ValuesRole) if variable.is_continuous: value = cont_fun(values) value = round(value, variable.number_of_decimals) elif variable.is_discrete: value = disc_fun(values) elif variable.is_string: value = "" else: raise NotImplementedError self.model.setData(index, value, ValueRole) self.model.dataChanged.connect(self.__table_data_changed) self.commit() @Inputs.data def set_data(self, data: Table): self.data = data self._set_input_summary() self._set_model_data() self.unconditional_commit() def _set_model_data(self): self.Information.nans_removed.clear() self.model.removeRows(0, self.model.rowCount()) if not self.data: return self.model.set_data(self.data, self.values) self.values = {} self.view.horizontalHeader().setStretchLastSection(False) self.view.resizeColumnsToContents() self.view.resizeRowsToContents() self.view.horizontalHeader().setStretchLastSection(True) @Inputs.reference def set_reference(self, data: Table): self.reference = data self._set_input_summary() def _set_input_summary(self): n_data = len(self.data) if self.data else 0 n_refs = len(self.reference) if self.reference else 0 summary, details, kwargs = self.info.NoInput, "", {} if self.data or self.reference: summary = f"{self.info.format_number(n_data)}, " \ f"{self.info.format_number(n_refs)}" data_list = [("Data", self.data), ("Reference", self.reference)] details = format_multiple_summaries(data_list) kwargs = {"format": Qt.RichText} self.info.set_input_summary(summary, details, **kwargs) def _set_output_summary(self, data: Optional[Table] = None): if data: summary, details = len(data), format_summary_details(data) else: summary, details = self.info.NoOutput, "" self.info.set_output_summary(summary, details) def commit(self): output_data = None if self.data: output_data = self._create_data_from_values() if self.append_to_data: output_data = self._append_to_data(output_data) self._set_output_summary(output_data) self.Outputs.data.send(output_data) def _create_data_from_values(self) -> Table: data = Table.from_domain(self.data.domain, 1) data.name = "created" data.X[:] = np.nan data.Y[:] = np.nan for i, m in enumerate(self.data.domain.metas): data.metas[:, i] = "" if m.is_string else np.nan values = self._get_values() for var_name, value in values.items(): data[:, var_name] = value return data def _append_to_data(self, data: Table) -> Table: assert self.data assert len(data) == 1 var = DiscreteVariable("Source ID", values=(self.data.name, data.name)) data = Table.concatenate([self.data, data], axis=0) domain = Domain(data.domain.attributes, data.domain.class_vars, data.domain.metas + (var, )) data = data.transform(domain) data.metas[:len(self.data), -1] = 0 data.metas[len(self.data):, -1] = 1 return data def _get_values(self) -> Dict[str, Union[str, float]]: values = {} for row in range(self.model.rowCount()): index = self.model.index(row, self.Header.variable) values[self.model.data(index, VariableRole).name] = \ self.model.data(index, ValueRole) return values def send_report(self): if not self.data: return self.report_domain("Input", self.data.domain) self.report_domain("Output", self.data.domain) items = [] values: Dict = self._get_values() for var in self.data.domain.variables + self.data.domain.metas: val = values.get(var.name, np.nan) if var.is_primitive(): val = var.repr_val(val) items.append([f"{var.name}:", val]) self.report_table("Values", items) @staticmethod def sizeHint(): return QSize(600, 500) def pack_settings(self): self.values: Dict[str, Union[str, float]] = self._get_values()
class OWConcordance(OWWidget): name = "Concordance" description = "Display the context of the word." icon = "icons/Concordance.svg" priority = 520 class Inputs: corpus = Input("Corpus", Corpus) query_word = Input("Query Word", Topic) class Outputs: selected_documents = Output("Selected Documents", Corpus) concordances = Output("Concordances", Corpus) settingsHandler = PerfectDomainContextHandler( match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL ) autocommit = Setting(True) context_width = Setting(5) word = ContextSetting("", exclude_metas=False) selected_rows = Setting([], schema_only=True) class Warning(OWWidget.Warning): multiple_words_on_input = Msg("Multiple query words on input. " "Only the first one is considered!") def __init__(self): super().__init__() self.corpus = None # Corpus self.n_matching = '' # Info on docs matching the word self.n_tokens = '' # Info on tokens self.n_types = '' # Info on types (unique tokens) self.is_word_on_input = False # Info attributes info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Tokens: %(n_tokens)s') gui.label(info_box, self, 'Types: %(n_types)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Width parameter gui.spin(self.controlArea, self, 'context_width', 3, 10, box=True, label="Number of words:", callback=self.set_width) gui.rubber(self.controlArea) # Search c_box = gui.widgetBox(self.mainArea, orientation="vertical") self.input = gui.lineEdit( c_box, self, 'word', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='Query:', callback=self.set_word, callbackOnType=True) self.input.setFocus() # Concordances view self.conc_view = QTableView() self.model = ConcordanceModel() self.conc_view.setModel(self.model) self.conc_view.setWordWrap(False) self.conc_view.setSelectionBehavior(QTableView.SelectRows) self.conc_view.setSelectionModel(DocumentSelectionModel(self.model)) self.conc_view.setItemDelegate(HorizontalGridDelegate()) self.conc_view.selectionModel().selectionChanged.connect(self.selection_changed) self.conc_view.horizontalHeader().hide() self.conc_view.setShowGrid(False) self.mainArea.layout().addWidget(self.conc_view) self.set_width() # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit', 'Auto commit is on') def sizeHint(self): # pragma: no cover return QSize(600, 400) def set_width(self): sel = self.conc_view.selectionModel().selection() self.model.set_width(self.context_width) if sel: self.conc_view.selectionModel().select(sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) def selection_changed(self): selection = self.conc_view.selectionModel().selection() self.selected_rows = sorted(set(cell.row() for cell in selection.indexes())) self.commit() def set_selection(self, selection): if selection: sel = QItemSelection() for row in selection: index = self.conc_view.model().index(row, 0) sel.select(index, index) self.conc_view.selectionModel().select(sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) @Inputs.corpus def set_corpus(self, data=None): self.closeContext() self.corpus = data if data is None: # data removed, clear selection self.selected_rows = [] if not self.is_word_on_input: self.word = "" self.openContext(self.corpus) self.model.set_corpus(self.corpus) self.set_word() @Inputs.query_word def set_word_from_input(self, topic): self.Warning.multiple_words_on_input.clear() if self.is_word_on_input: # word changed, clear selection self.selected_rows = [] self.is_word_on_input = topic is not None and len(topic) > 0 self.input.setEnabled(not self.is_word_on_input) if self.is_word_on_input: if len(topic) > 1: self.Warning.multiple_words_on_input() self.word = topic.metas[0, 0] self.set_word() def set_word(self): self.selected_rows = [] self.model.set_word(self.word) self.update_widget() self.commit() def handleNewSignals(self): self.set_selection(self.selected_rows) def resize_columns(self): col_width = (self.conc_view.width() - self.conc_view.columnWidth(1)) / 2 - 12 self.conc_view.setColumnWidth(0, col_width) self.conc_view.setColumnWidth(2, col_width) def resizeEvent(self, event): # pragma: no cover super().resizeEvent(event) self.resize_columns() def update_widget(self): self.conc_view.resizeColumnToContents(1) self.resize_columns() self.conc_view.resizeRowsToContents() if self.corpus is not None: self.n_matching = '{}/{}'.format( self.model.matching_docs() if self.word else 0, len(self.corpus)) self.n_tokens = self.model.n_tokens self.n_types = self.model.n_types else: self.n_matching = '' self.n_tokens = '' self.n_types = '' def commit(self): selected_docs = sorted(set(self.model.word_index[row][0] for row in self.selected_rows)) concordance = self.model.get_data() if selected_docs: selected = self.corpus[selected_docs] self.Outputs.selected_documents.send(selected) else: self.Outputs.selected_documents.send(None) self.Outputs.concordances.send(concordance) def send_report(self): view = self.conc_view model = self.conc_view.model() self.report_items("Concordances", ( ("Query", model.word), ("Tokens", model.n_tokens), ("Types", model.n_types), ("Matching", self.n_matching), )) self.report_table(view)
class ExtendedTableView(QWidget): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.ow = kwargs.get('parent', None) # set layout layout = QVBoxLayout() layout.setContentsMargins(0, 0, 0, 0) self.setLayout(layout) # set splitter self.splitter = QSplitter() self.splitter.setOrientation(Qt.Horizontal) # data models self.genes_model = None self.info_model = None # left side list view self.genes_view = QTableView() self.genes_view.horizontalHeader().hide() self.genes_view.setItemDelegate(GeneItemDelegate()) self.genes_view.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) # right side list view self.info_view = QTableView() self.info_view.setItemDelegate(HTMLDelegate()) self.info_view.horizontalHeader().hide() self.info_view.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) self.splitter.addWidget(self.genes_view) self.splitter.addWidget(self.info_view) # self.splitter.setStretchFactor(0, 60) # self.splitter.setStretchFactor(1, 40) self.layout().addWidget(self.splitter) def set_genes_model(self, rows): self.genes_model = GeneMatcherModel() self.genes_model.add_rows(rows) def get_selected_gens(self): # return a list of QModelIndex return self.genes_selection_model().selectedRows() def reset_genes_model(self): if self.genes_model: self.genes_model.deleteLater() self.genes_model = None def genes_selection_model(self): return self.genes_view.selectionModel() def reset_info_model(self): if self.info_model: self.info_model.deleteLater() self.info_model = None self.info_view.setModel(None) def set_info_model(self, rows): unique, partial, unknown = range(len(self.ow.filter_labels)) if self.ow.selected_filter == unique: # create model self.info_model = GeneMatcherModel(show_icon=False) # add rows self.info_model.add_rows(rows) # add model to the view self.info_view.setModel(self.info_model) # disable selection of gene info cards self.info_view.setSelectionMode(QAbstractItemView.NoSelection) # call sizeHint function self.info_view.resizeRowsToContents() else: self.reset_info_model()
class OWConcordance(OWWidget): name = "Concordance" description = "Display the context of the word." icon = "icons/Concordance.svg" priority = 30000 inputs = [ ('Corpus', Table, 'set_corpus'), ('Query Word', Topic, 'set_word_from_input'), ] outputs = [('Selected Documents', Table, )] autocommit = Setting(True) context_width = Setting(5) word = Setting("") # TODO Set selection settings. class Warning(OWWidget.Warning): multiple_words_on_input = Msg("Multiple query words on input. " "Only the first one is considered!") def __init__(self): super().__init__() self.corpus = None # Corpus self.n_documents = '' # Info on docs self.n_matching = '' # Info on docs matching the word self.n_tokens = '' # Info on tokens self.n_types = '' # Info on types (unique tokens) # Info attributes info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Documents: %(n_documents)s') gui.label(info_box, self, 'Tokens: %(n_tokens)s') gui.label(info_box, self, 'Types: %(n_types)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Width parameter gui.spin(self.controlArea, self, 'context_width', 3, 10, box=True, label="Number of words:", callback=self.set_width) gui.rubber(self.controlArea) # Search c_box = gui.widgetBox(self.mainArea, orientation="vertical") self.input = gui.lineEdit( c_box, self, 'word', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='Query:', callback=self.set_word, callbackOnType=True) self.input.setFocus() # Concordances view self.conc_view = QTableView() self.model = ConcordanceModel() self.conc_view.setModel(self.model) self.conc_view.setWordWrap(False) self.conc_view.setSelectionBehavior(QTableView.SelectRows) self.conc_view.setSelectionModel(DocumentSelectionModel(self.model)) self.conc_view.setItemDelegate(HorizontalGridDelegate()) # connect selectionChanged to self.commit(), which will be # updated by gui.auto_commit() self.conc_view.selectionModel().selectionChanged.connect(lambda: self.commit()) self.conc_view.horizontalHeader().hide() self.conc_view.setShowGrid(False) self.mainArea.layout().addWidget(self.conc_view) self.set_width() # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit', 'Auto commit is on') def sizeHint(self): # pragma: no cover return QSize(600, 400) def set_width(self): sel = self.conc_view.selectionModel().selection() self.model.set_width(self.context_width) if sel: self.conc_view.selectionModel().select(sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) def set_corpus(self, data=None): self.corpus = data if data is not None and not isinstance(data, Corpus): self.corpus = Corpus.from_table(data.domain, data) self.model.set_corpus(self.corpus) self.update_widget() self.commit() def set_word_from_input(self, topic): self.Warning.multiple_words_on_input.clear() have_word = topic is not None and len(topic) > 0 self.input.setEnabled(not have_word) if have_word: if len(topic) > 1: self.Warning.multiple_words_on_input() self.word = topic.metas[0, 0] self.set_word() def set_word(self): self.model.set_word(self.word) self.update_widget() def resize_columns(self): col_width = (self.conc_view.width() - self.conc_view.columnWidth(1)) / 2 - 12 self.conc_view.setColumnWidth(0, col_width) self.conc_view.setColumnWidth(2, col_width) def resizeEvent(self, event): # pragma: no cover super().resizeEvent(event) self.resize_columns() def update_widget(self): self.conc_view.resizeColumnToContents(1) self.resize_columns() self.conc_view.resizeRowsToContents() if self.corpus is not None: self.n_documents = len(self.corpus) self.n_matching = '{}/{}'.format( self.model.matching_docs() if self.word else 0, self.n_documents) self.n_tokens = sum(map(len, self.corpus.tokens)) \ if self.corpus.has_tokens() else 'n/a' self.n_types = len(self.corpus.dictionary) \ if self.corpus.has_tokens() else 'n/a' else: self.n_documents = '' self.n_matching = '' self.n_tokens = '' self.n_types = '' def commit(self): rows = [sel_range.top() for sel_range in self.conc_view.selectionModel().selection()] selected_docs = sorted(set(self.model.word_index[row][0] for row in rows)) if selected_docs: selected = self.corpus[selected_docs] self.send("Selected Documents", selected) else: self.send("Selected Documents", None)