def set_dataset(self, data, tid=None): """Set the input dataset.""" self.closeContext() if data is not None: if tid in self._inputs: # update existing input slot slot = self._inputs[tid] view = slot.view # reset the (header) view state. view.setModel(None) view.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder) else: view = QTableView() view.setSortingEnabled(True) view.setHorizontalScrollMode(QTableView.ScrollPerPixel) if self.select_rows: view.setSelectionBehavior(QTableView.SelectRows) header = view.horizontalHeader() header.setSectionsMovable(True) header.setSectionsClickable(True) header.setSortIndicatorShown(True) header.setSortIndicator(-1, Qt.AscendingOrder) # QHeaderView does not 'reset' the model sort column, # because there is no guaranty (requirement) that the # models understand the -1 sort column. def sort_reset(index, order): if view.model() is not None and index == -1: view.model().sort(index, order) header.sortIndicatorChanged.connect(sort_reset) view.dataset = data self.tabs.addTab(view, getattr(data, "name", "Data")) self._setup_table_view(view, data) slot = TableSlot(tid, data, table_summary(data), view) view._input_slot = slot self._inputs[tid] = slot self.tabs.setCurrentIndex(self.tabs.indexOf(view)) self.set_info(slot.summary) if isinstance(slot.summary.len, concurrent.futures.Future): def update(f): QMetaObject.invokeMethod(self, "_update_info", Qt.QueuedConnection) slot.summary.len.add_done_callback(update) elif tid in self._inputs: slot = self._inputs.pop(tid) view = slot.view view.hide() view.deleteLater() self.tabs.removeTab(self.tabs.indexOf(view)) current = self.tabs.currentWidget() if current is not None: self.set_info(current._input_slot.summary) self.tabs.tabBar().setVisible(self.tabs.count() > 1) self.openContext(data) if self.__pending_selected_rows is not None: self.selected_rows = self.__pending_selected_rows self.__pending_selected_rows = None else: self.selected_rows = [] if self.__pending_selected_cols is not None: self.selected_cols = self.__pending_selected_cols self.__pending_selected_cols = None else: self.selected_cols = [] self.set_selection() self.commit()
class OWGenialisExpressions(widget.OWWidget, ConcurrentWidgetMixin): name = 'Genialis Expressions' priority = 30 want_main_area = True want_control_area = True icon = '../widgets/icons/OWGenialisExpressions.svg' pagination_availability = pyqtSignal(bool, bool) norm_component = settings.SettingProvider(NormalizationComponent) pagination_component = settings.SettingProvider(PaginationComponent) filter_component = settings.SettingProvider(CollapsibleFilterComponent) exp_type: int exp_type = settings.Setting(1, schema_only=True) proc_slug: int proc_slug = settings.Setting(0, schema_only=True) exp_source: int exp_source = settings.Setting(0, schema_only=True) append_qc_data: bool append_qc_data = settings.Setting(False, schema_only=True) auto_commit: bool auto_commit = settings.Setting(False, schema_only=True) class Outputs: table = Output('Expressions', Table) class Warning(widget.OWWidget.Warning): no_expressions = Msg('Expression data objects not found.') no_data_objects = Msg( 'No expression data matches the selected options.') unexpected_feature_type = Msg( 'Can not import expression data, unexpected feature type "{}".') multiple_feature_type = Msg( 'Can not import expression data, multiple feature types found.') def __init__(self): super().__init__() ConcurrentWidgetMixin.__init__(self) self._res: Optional[resolwe.resapi.ResolweAPI] = None # Store collection ID from currently selected row self.selected_collection_id: Optional[str] = None # Store data output options self.data_output_options: Optional[DataOutputOptions] = None # Cache output data table self.data_table: Optional[Table] = None # Cache clinical metadata self.clinical_metadata: Optional[Table] = None # Control area self.info_box = gui.widgetLabel( gui.widgetBox(self.controlArea, "Info", margin=3), 'No data on output.') self.exp_type_combo = gui.comboBox( self.controlArea, self, 'exp_type', label='Expression Type', callback=self.on_output_option_changed) self.proc_slug_combo = gui.comboBox( self.controlArea, self, 'proc_slug', label='Process Name', callback=self.on_output_option_changed) self.exp_source_combo = gui.comboBox( self.controlArea, self, 'exp_source', label='Expression source', callback=self.on_output_option_changed, ) self.norm_component = NormalizationComponent(self, self.controlArea) self.norm_component.options_changed.connect( self.on_normalization_changed) box = gui.widgetBox(self.controlArea, 'Sample QC') gui.checkBox(box, self, 'append_qc_data', 'Append QC data', callback=self.on_output_option_changed) gui.rubber(self.controlArea) box = gui.widgetBox(self.controlArea, 'Sign in') self.user_info = gui.label(box, self, '') self.server_info = gui.label(box, self, '') box = gui.widgetBox(box, orientation=Qt.Horizontal) self.sign_in_btn = gui.button(box, self, 'Sign In', callback=self.sign_in, autoDefault=False) self.sign_out_btn = gui.button(box, self, 'Sign Out', callback=self.sign_out, autoDefault=False) self.commit_button = gui.auto_commit(self.controlArea, self, 'auto_commit', '&Commit', box=False) self.commit_button.button.setAutoDefault(False) # Main area self.table_view = QTableView() self.table_view.setAlternatingRowColors(True) self.table_view.viewport().setMouseTracking(True) self.table_view.setShowGrid(False) self.table_view.verticalHeader().hide() self.table_view.horizontalHeader().setSectionResizeMode( QHeaderView.ResizeToContents) self.table_view.horizontalHeader().setStretchLastSection(True) self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows) self.table_view.setSelectionMode(QAbstractItemView.SingleSelection) # self.table_view.setStyleSheet('QTableView::item:selected{background-color: palette(highlight); color: palette(highlightedText);};') self.model = GenialisExpressionsModel(self) self.model.setHorizontalHeaderLabels(TableHeader.labels()) self.table_view.setModel(self.model) self.table_view.selectionModel().selectionChanged.connect( self.on_selection_changed) self.filter_component = CollapsibleFilterComponent(self, self.mainArea) self.filter_component.options_changed.connect(self.on_filter_changed) self.mainArea.layout().addWidget(self.table_view) self.pagination_component = PaginationComponent(self, self.mainArea) self.pagination_component.options_changed.connect( self.update_collections_view) self.sign_in(silent=True) @property def res(self): return self._res @res.setter def res(self, value: resolwe.resapi.ResolweAPI): if isinstance(value, resolwe.resapi.ResolweAPI): self._res = value self.update_user_status() self.update_collections_view() self.__invalidate() self.Outputs.table.send(None) def __invalidate(self): self.data_table = None self.selected_collection_id = None self.clinical_metadata = None self.data_output_options = None self.exp_type_combo.clear() self.proc_slug_combo.clear() self.exp_source_combo.clear() self.Outputs.table.send(None) self.Warning.no_expressions.clear() self.Warning.multiple_feature_type.clear() self.Warning.unexpected_feature_type.clear() self.Warning.no_data_objects.clear() self.info.set_output_summary(StateInfo.NoOutput) self.update_info_box() def update_user_status(self): user = self.res.get_currently_logged_user() if user: user_info = f"{user[0].get('first_name', '')} {user[0].get('last_name', '')}".strip( ) user_info = f"User: {user_info if user_info else user[0].get('username', '')}" self.sign_in_btn.setEnabled(False) self.sign_out_btn.setEnabled(True) else: user_info = 'User: Anonymous' self.sign_in_btn.setEnabled(True) self.sign_out_btn.setEnabled(False) self.user_info.setText(user_info) self.server_info.setText(f'Server: {self.res.url[8:]}') def update_info_box(self): if self.data_table: total_genes = len(self.data_table.domain.attributes) known_genes = len([ col for col in self.data_table.domain.attributes if len(col.attributes) ]) info_text = ('{} genes on output\n' '{} genes match Entrez database\n' '{} genes with match conflicts\n'.format( total_genes, known_genes, total_genes - known_genes)) else: info_text = 'No data on output.' self.info_box.setText(info_text) def sign_in(self, silent=False): dialog = SignIn(self, server_type=resolwe.RESOLWE_PLATFORM) if silent: dialog.sign_in() if dialog.resolwe_instance is not None: self.res = dialog.resolwe_instance else: self.res = resolwe.connect( url=resolwe.resapi.DEFAULT_URL, server_type=resolwe.RESOLWE_PLATFORM) if not silent and dialog.exec_(): self.res = dialog.resolwe_instance def sign_out(self): # Use public credentials when user signs out self.res = resolwe.connect(url=resolwe.resapi.DEFAULT_URL, server_type=resolwe.RESOLWE_PLATFORM) # Remove username and password cm = get_credential_manager(resolwe.RESOLWE_PLATFORM) if cm.username: del cm.username if cm.password: del cm.password def on_filter_changed(self): self.pagination_component.reset_pagination() self.update_collections_view() def get_query_parameters(self) -> Dict[str, str]: params = { 'limit': ItemsPerPage.values()[self.pagination_component.items_per_page], 'offset': self.pagination_component.offset, 'ordering': SortBy.values()[self.filter_component.sort_by], } if self.filter_component.filter_by_full_text: params.update({'text': self.filter_component.filter_by_full_text}) if self.filter_component.filter_by_name: params.update( {'name__icontains': self.filter_component.filter_by_name}) if self.filter_component.filter_by_contrib: params.update( {'contributor_name': self.filter_component.filter_by_contrib}) if self.filter_component.filter_by_owner: params.update( {'owners_name': self.filter_component.filter_by_owner}) last_modified = FilterByDateModified.values()[ self.filter_component.filter_by_modified] if last_modified: params.update({'modified__gte': last_modified.isoformat()}) return params def get_collections(self) -> Tuple[Dict[str, str], Dict[str, str]]: # Get response from the server collections = self.res.get_collections(**self.get_query_parameters()) # Loop trough collections and store ids collection_ids = [ collection['id'] for collection in collections.get('results', []) ] # Get species by collection ids collection_to_species = self.res.get_species(collection_ids) return collections, collection_to_species def update_collections_view(self): collections, collection_to_species = self.get_collections() # Pass the results to data model self.model.set_data(collections.get('results', []), collection_to_species) self.table_view.setItemDelegateForColumn( TableHeader.id, gui.LinkStyledItemDelegate(self.table_view)) self.table_view.setColumnHidden(TableHeader.slug, True) self.table_view.setColumnHidden(TableHeader.tags, True) # Check pagination parameters and emit pagination_availability signal next_page = True if collections.get('next') else False previous_page = True if collections.get('previous') else False self.pagination_availability.emit(next_page, previous_page) def normalize(self, table: Table) -> Optional[Table]: if not table: return if self.norm_component.quantile_norm: table = QuantileNormalization()(table) if self.norm_component.log_norm: table = LogarithmicScale()(table) if self.norm_component.z_score_norm: table = ZScore(axis=self.norm_component.z_score_axis)(table) if self.norm_component.quantile_transform: axis = self.norm_component.quantile_transform_axis quantiles = table.X.shape[int(not axis)] distribution = QuantileTransformDist.values()[ self.norm_component.quantile_transform_dist] table = QuantileTransform(axis=axis, n_quantiles=quantiles, output_distribution=distribution)(table) return table def commit(self): self.Warning.no_data_objects.clear() self.cancel() self.start(self.runner) def on_output_option_changed(self): self.data_table = None self.commit() def on_clinical_data_changed(self): self.clinical_metadata = self.fetch_clinical_metadata() self.commit() def on_normalization_changed(self): self.commit() def on_selection_changed(self): self.__invalidate() collection_id: str = self.get_selected_row_data(TableHeader.id) if not collection_id: return self.selected_collection_id = collection_id data_objects = self.res.get_expression_data_objects(collection_id) self.data_output_options = available_data_output_options(data_objects) self.exp_type_combo.addItems( exp_name for _, exp_name in self.data_output_options.expression_type) if self.exp_type >= len(self.data_output_options.expression_type): self.exp_type = 0 self.exp_type_combo.setCurrentIndex(self.exp_type) self.proc_slug_combo.addItems( proc_name for _, proc_name in self.data_output_options.process) if self.proc_slug >= len(self.data_output_options.process): self.proc_slug = 0 self.proc_slug_combo.setCurrentIndex(self.proc_slug) self.exp_source_combo.addItems( self.data_output_options.expression_sources) if self.exp_source >= len(self.data_output_options.expression_sources): self.exp_source = 0 self.exp_source_combo.setCurrentIndex(self.exp_source) if not data_objects: self.Warning.no_expressions() return # Note: This here is to handle an edge case where we get # different 'feature_type' data object in a collection. # For now we raise a warning, but in the future we should # discuss about how to properly handle different types of features. feature_types = {data.output['feature_type'] for data in data_objects} if len(feature_types) == 1 and 'gene' not in feature_types: self.Warning.unexpected_feature_type(feature_types.pop()) # self.data_objects = [] return if len(feature_types) > 1: self.Warning.multiple_feature_type() # self.data_objects = [] return self.on_output_option_changed() def get_selected_row_data(self, column: int) -> Optional[str]: selection_model = self.table_view.selectionModel() rows = selection_model.selectedRows(column=column) if not rows: return return rows[0].data() def on_done(self, table: Table): if table: samples, genes = table.X.shape self.info.set_output_summary(f'Samples: {samples} Genes: {genes}') self.update_info_box() self.Outputs.table.send(table) def on_exception(self, ex): # if isinstance(ex, ResolweDataObjectsNotFound): # self.Warning.no_data_objects() # self.Outputs.table.send(None) # self.data_table = None # self.info.set_output_summary(StateInfo.NoOutput) # self.update_info_box() # else: raise ex def on_partial_result(self, result: Any) -> None: pass def onDeleteWidget(self): self.shutdown() super().onDeleteWidget() def sizeHint(self): return QSize(1280, 620) def runner(self, state: TaskState) -> Table: exp_type = self.data_output_options.expression_type[self.exp_type].type exp_source = self.data_output_options.expression_sources[ self.exp_source] proc_slug = self.data_output_options.process[self.proc_slug].slug collection_id = self.selected_collection_id table = self.data_table progress_steps_download = iter(np.linspace(0, 50, 2)) def callback(i: float, status=""): state.set_progress_value(i * 100) if status: state.set_status(status) if state.is_interruption_requested(): raise Exception if not table: collection = self.res.get_collection_by_id(collection_id) coll_table = resdk.tables.RNATables( collection, expression_source=exp_source, expression_process_slug=proc_slug, progress_callable=wrap_callback(callback, end=0.5), ) species = coll_table._data[0].output['species'] sample = coll_table._samples[0] state.set_status('Downloading ...') loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) df_exp = coll_table.exp if exp_type != 'rc' else coll_table.rc df_exp = df_exp.rename(index=coll_table.readable_index) df_metas = coll_table.meta df_metas = df_metas.rename(index=coll_table.readable_index) df_qc = None if self.append_qc_data: # TODO: check if there is a way to detect if collection # table contains QC data try: df_qc = coll_table.qc df_qc = df_qc.rename(index=coll_table.readable_index) except ValueError: pass loop.close() state.set_status('To data table ...') duplicates = { item for item, count in Counter([ label.split('.')[1] for label in df_metas.columns.to_list() if '.' in label ]).items() if count > 1 } # what happens if there is more nested sections? section_name_to_label = { section['name']: section['label'] for section in sample.descriptor_schema.schema } column_labels = {} for field_schema, fields, path in iterate_schema( sample.descriptor, sample.descriptor_schema.schema, path=''): path = path[1:] # this is ugly, but cant go around it if path not in df_metas.columns: continue label = field_schema['label'] section_name, field_name = path.split('.') column_labels[path] = ( label if field_name not in duplicates else f'{section_name_to_label[section_name]} - {label}') df_exp = df_exp.reset_index(drop=True) df_metas = df_metas.astype('object') df_metas = df_metas.fillna(np.nan) df_metas = df_metas.replace('nan', np.nan) df_metas = df_metas.rename(columns=column_labels) if df_qc is not None: df_metas = pd.merge(df_metas, df_qc, left_index=True, right_index=True) xym, domain_metas = vars_from_df(df_metas) x, _, m = xym x_metas = np.hstack((x, m)) attrs = [ContinuousVariable(col) for col in df_exp.columns] metas = domain_metas.attributes + domain_metas.metas domain = Domain(attrs, metas=metas) table = Table(domain, df_exp.to_numpy(), metas=x_metas) state.set_progress_value(next(progress_steps_download)) state.set_status('Matching genes ...') progress_steps_gm = iter( np.linspace(50, 99, len(coll_table.gene_ids))) def gm_callback(): state.set_progress_value(next(progress_steps_gm)) tax_id = species_name_to_taxid(species) gm = GeneMatcher(tax_id, progress_callback=gm_callback) table = gm.match_table_attributes(table, rename=True) table.attributes[TableAnnotation.tax_id] = tax_id table.attributes[TableAnnotation.gene_as_attr_name] = True table.attributes[TableAnnotation.gene_id_attribute] = 'Entrez ID' self.data_table = table state.set_status('Normalizing ...') table = self.normalize(table) state.set_progress_value(100) return table
class OWClusterAnalysis(OWWidget): name = "Cluster Analysis" description = "The widget displays differentially expressed genes that characterize the cluster, " \ "and corresponding gene terms that describe differentially expressed genes" icon = "../widgets/icons/OWClusterAnalysis.svg" priority = 100 class Inputs: data_table = Input('Data', Table) custom_sets = Input('Custom Gene Sets', Table) class Outputs: selected_data = Output('Selected Data', Table) gene_scores = Output('Gene Scores', Table) gene_set_scores = Output('Gene Set Scores', Table) class Information(OWWidget.Information): pass class Warning(OWWidget.Warning): gene_enrichment = Msg('{}, {}.') no_selected_gene_sets = Msg( 'No gene set selected, select them from Gene Sets box.') class Error(OWWidget.Error): no_cluster_indicator = Msg('No cluster indicator in the input data') gene_as_attributes = Msg( 'Genes, in the input data, are expected as column names') organism_mismatch = Msg( 'Organism in input data and custom gene sets does not match') cluster_batch_conflict = Msg( 'Cluster and batch must not be the same variable') settingsHandler = ClusterAnalysisContextHandler() cluster_indicators = ContextSetting([]) batch_indicator = ContextSetting(None) stored_gene_sets_selection = ContextSetting(tuple()) scoring_method_selection = ContextSetting(0) scoring_method_design = ContextSetting(0) scoring_test_type = ContextSetting(0) # genes filter max_gene_count = Setting(20) use_gene_count_filter = Setting(True) max_gene_p_value = Setting(0.1) use_gene_pval_filter = Setting(False) max_gene_fdr = Setting(0.1) use_gene_fdr_filter = Setting(True) # gene sets filter min_gs_count = Setting(5) use_gs_count_filter = Setting(True) max_gs_p_value = Setting(0.1) use_gs_pval_filter = Setting(False) max_gs_fdr = Setting(0.1) use_gs_max_fdr = Setting(True) # auto commit results auto_commit = settings.Setting(False) custom_gene_set_indicator = settings.Setting(None) def __init__(self): super().__init__() # widget attributes self.input_data = None self.store_input_domain = None self.input_genes_names = [] self.input_genes_ids = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None # custom gene set input self.feature_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, StringVariable)) self.custom_data = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.num_of_custom_sets = None self.rows_by_cluster = None self.rows_by_batch = None self.clusters = [] self.new_cluster_profile = [] # data model self.cluster_info_model = None # Info info_box = vBox(self.controlArea, 'Info') self.input_info = widgetLabel(info_box) # Cluster selection self.cluster_indicator_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, ), separators=False) self.cluster_indicator_box = widgetBox(self.controlArea, 'Cluster Indicator') self.cluster_indicator_view = listView( self.cluster_indicator_box, self, 'cluster_indicators', model=self.cluster_indicator_model, selectionMode=QListWidget.MultiSelection, callback=self.invalidate, sizeHint=QSize(256, 70)) # Batch selection self.batch_indicator_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, ), separators=False, placeholder="") box = widgetBox(self.controlArea, 'Batch Indicator') self.batch_indicator_combobox = comboBox( box, self, 'batch_indicator', model=self.batch_indicator_model, sendSelectedValue=True, callback=self.batch_indicator_changed) # Gene scoring box = widgetBox(self.controlArea, 'Gene Scoring') self.gene_scoring = GeneScoringWidget(box, self) self.gene_scoring.set_method_selection_area('scoring_method_selection') self.gene_scoring.set_method_design_area('scoring_method_design') self.gene_scoring.set_test_type('scoring_test_type') # Gene Sets widget gene_sets_box = widgetBox(self.controlArea, "Gene Sets") self.gs_widget = GeneSetsSelection(gene_sets_box, self, 'stored_gene_sets_selection') self.gs_widget.hierarchy_tree_widget.itemClicked.connect( self.__gene_sets_enrichment) # custom gene sets area box = vBox(self.controlArea, "Custom Gene Sets") if self.custom_gene_set_indicator not in self.feature_model: self.custom_gene_set_indicator = None self.gs_label_combobox = comboBox( box, self, "custom_gene_set_indicator", sendSelectedValue=True, model=self.feature_model, callback=self.handle_custom_gene_sets) self.gs_label_combobox.setDisabled(True) # main area splitter = QSplitter(Qt.Horizontal, self.mainArea) self.mainArea.layout().addWidget(splitter) genes_filter = widgetBox(splitter, 'Filter Genes', orientation=QHBoxLayout()) spin(genes_filter, self, 'max_gene_count', 0, 10000, label='Count', tooltip='Minimum genes count', checked='use_gene_count_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes) doubleSpin(genes_filter, self, 'max_gene_p_value', 0.0, 1.0, 0.0001, label='p-value', tooltip='Maximum p-value of the enrichment score', checked='use_gene_pval_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes) doubleSpin(genes_filter, self, 'max_gene_fdr', 0.0, 1.0, 0.0001, label='FDR', tooltip='Maximum false discovery rate', checked='use_gene_fdr_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes) gene_sets_filter = widgetBox(splitter, 'Filter Gene Sets', orientation=QHBoxLayout()) spin(gene_sets_filter, self, 'min_gs_count', 0, DISPLAY_GENE_SETS_COUNT, label='Count', tooltip='Minimum genes count', checked='use_gs_count_filter', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets) doubleSpin(gene_sets_filter, self, 'max_gs_p_value', 0.0, 1.0, 0.0001, label='p-value', tooltip='Maximum p-value of the enrichment score', checked='use_gs_pval_filter', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets) doubleSpin(gene_sets_filter, self, 'max_gs_fdr', 0.0, 1.0, 0.0001, label='FDR', tooltip='Maximum false discovery rate', checked='use_gs_max_fdr', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets) self.cluster_info_view = QTableView() self.cluster_info_view.verticalHeader().setVisible(False) self.cluster_info_view.setItemDelegate(HTMLDelegate()) self.cluster_info_view.horizontalHeader().hide() self.cluster_info_view.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) self.mainArea.layout().addWidget(self.cluster_info_view) def sizeHint(self): return QSize(800, 600) def __update_info_box(self): info_string = '' if self.input_genes_ids: info_string += '{} samples, {} clusters\n'.format( self.input_data.X.shape[0], len(self.clusters) if self.clusters else '?') info_string += '{:,d} unique genes\n'.format( len(self.input_genes_ids)) else: info_string += 'No genes on input.\n' if self.custom_data: info_string += '{} marker genes in {} sets\n'.format( self.custom_data.X.shape[0], self.num_of_custom_sets) self.input_info.setText(info_string) def __set_cluster_info_model(self): self.cluster_info_view.setModel(None) self.cluster_info_model = ClusterModel(self) self.cluster_info_model.add_rows(self.clusters) # add model to the view self.cluster_info_view.setModel(self.cluster_info_model) # call sizeHint function self.cluster_info_view.resizeRowsToContents() self.cluster_info_view.selectionModel().selectionChanged.connect( self.commit) def __create_temp_class_var(self): """ See no evil !""" cluster_indicator_name = 'Cluster indicators' var_index_lookup = dict([(val, idx) for var in self.cluster_indicators for idx, val in enumerate(var.values)]) row_profile = None new_cluster_values = [] cart_prod = itertools.product( *[cluster.values for cluster in self.cluster_indicators]) for comb in cart_prod: new_cluster_values.append(', '.join([val for val in comb])) self.new_cluster_profile.append( [var_index_lookup[val] for val in comb]) row_profile_lookup = dict([(tuple(profile), indx) for indx, ( profile, _) in enumerate(zip(self.new_cluster_profile, new_cluster_values)) ]) for var in self.cluster_indicators: if row_profile is None: row_profile = np.asarray( self.input_data.get_column_view(var)[0], dtype=int) else: row_profile = np.vstack( (row_profile, np.asarray(self.input_data.get_column_view(var)[0], dtype=int))) ca_ind = DiscreteVariable.make( cluster_indicator_name, values=[val for val in new_cluster_values], ordered=True) domain = Domain(self.input_data.domain.attributes, self.input_data.domain.class_vars, self.input_data.domain.metas + (ca_ind, )) table = self.input_data.transform(domain) table[:, ca_ind] = np.array( [[row_profile_lookup[tuple(row_profile[:, i])]] for i in range(row_profile.shape[1])]) self.input_data = table return ca_ind def __set_clusters(self): self.clusters = [] self.new_cluster_profile = [] self.cluster_var = None if self.cluster_indicators and self.input_data: if isinstance(self.cluster_indicators, list) and len(self.cluster_indicators) > 1: self.cluster_var = self.__create_temp_class_var() else: self.cluster_var = self.cluster_indicators[0] self.rows_by_cluster = np.asarray(self.input_data.get_column_view( self.cluster_var)[0], dtype=int) for index, name in enumerate(self.cluster_var.values): cluster = Cluster(name, index) self.clusters.append(cluster) cluster.set_genes(self.input_genes_names, self.input_genes_ids) def __set_batch(self): self.Error.cluster_batch_conflict.clear() self.rows_by_batch = None if self.batch_indicator == self.cluster_var: self.Error.cluster_batch_conflict() return if self.batch_indicator and self.input_data: self.rows_by_batch = np.asarray(self.input_data.get_column_view( self.batch_indicator)[0], dtype=int) def __set_genes(self): self.input_genes_names = [] self.input_genes_ids = [] if self.use_attr_names: for variable in self.input_data.domain.attributes: self.input_genes_names.append(str(variable.name)) self.input_genes_ids.append( str(variable.attributes.get(self.gene_id_attribute, np.nan))) def filter_genes(self): if self.cluster_info_model: # filter genes # note: after gene filter is applied, we need to recalculate gene set enrichment self.cluster_info_model.apply_gene_filters( self.max_gene_p_value if self.use_gene_pval_filter else None, self.max_gene_fdr if self.use_gene_fdr_filter else None, self.max_gene_count if self.use_gene_count_filter else None) # recalculate gene set enrichment self.__gene_sets_enrichment() # call sizeHint function self.cluster_info_view.resizeRowsToContents() # commit changes after filter self.commit() def filter_gene_sets(self): if self.cluster_info_model: # filter gene sets self.cluster_info_model.apply_gene_sets_filters( self.max_gs_p_value if self.use_gs_pval_filter else None, self.max_gs_fdr if self.use_gs_max_fdr else None, self.min_gs_count if self.use_gs_count_filter else None) # call sizeHint function self.cluster_info_view.resizeRowsToContents() def __gene_enrichment(self): design = bool(self.gene_scoring.get_selected_desig() ) # if true cluster vs. cluster else cluster vs rest test_type = self.gene_scoring.get_selected_test_type() method = self.gene_scoring.get_selected_method() try: if method.score_function == score_hypergeometric_test: values = set(np.unique(self.input_data.X)) if (0 not in values) or (len(values) != 2): raise ValueError('Binary data expected (use Preprocess)') self.cluster_info_model.score_genes( design=design, table_x=self.input_data.X, rows_by_cluster=self.rows_by_cluster, rows_by_batch=self.rows_by_batch, method=method, alternative=test_type) except ValueError as e: self.Warning.gene_enrichment(str(e), 'p-values are set to 1') def __gene_sets_enrichment(self): if self.input_data: self.Warning.no_selected_gene_sets.clear() all_sets = self.gs_widget.get_hierarchies() selected_sets = self.gs_widget.get_hierarchies(only_selected=True) if len(selected_sets) == 0 and len(all_sets) > 0: self.Warning.no_selected_gene_sets() # save setting on selected hierarchies self.stored_gene_sets_selection = tuple(selected_sets) ref_genes = set(self.input_genes_ids) try: self.cluster_info_model.gene_sets_enrichment( self.gs_widget.gs_object, selected_sets, ref_genes) except Exception as e: # TODO: possible exceptions? raise e self.filter_gene_sets() def invalidate(self, cluster_init=True): if self.input_data is not None and self.tax_id is not None: self.Warning.gene_enrichment.clear() if self.cluster_info_model is not None: self.cluster_info_model.cancel() self.__set_genes() if cluster_init: self.__set_clusters() self.__set_batch() self.__set_cluster_info_model() # note: when calling self.__gene_enrichment we calculate gse automatically. # No need to call self.__gene_sets_enrichment here self.__gene_enrichment() self.__update_info_box() def batch_indicator_changed(self): self.invalidate(cluster_init=False) @Inputs.data_table def handle_input(self, data): self.closeContext() self.Warning.clear() self.Error.clear() self.input_data = None self.store_input_domain = None self.stored_gene_sets_selection = tuple() self.input_genes_names = [] self.input_genes_ids = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.clusters = None self.gs_widget.clear() self.gs_widget.clear_gene_sets() self.cluster_info_view.setModel(None) self.cluster_indicators = [] self.cluster_var = None self.batch_indicator = None self.cluster_indicator_model.set_domain(None) self.batch_indicator_model.set_domain(None) self.__update_info_box() if data: self.input_data = data self.cluster_indicator_model.set_domain(self.input_data.domain) self.batch_indicator_model.set_domain(self.input_data.domain) # For Cluster Indicator do not use categorical variables that contain only one value. self.cluster_indicator_model.wrap([ item for item in self.cluster_indicator_model if len(item.values) > 1 ]) # First value in batch indicator model is a NoneType, # we can skip it when we validate categorical variables self.batch_indicator_model.wrap(self.batch_indicator_model[:1] + [ item for item in self.batch_indicator_model[1:] if len(item.values) > 1 ]) self.tax_id = self.input_data.attributes.get(TAX_ID, None) self.use_attr_names = self.input_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.gene_id_attribute = self.input_data.attributes.get( GENE_ID_ATTRIBUTE, None) if not self.cluster_indicator_model: self.Error.no_cluster_indicator() return elif not self.use_attr_names: self.Error.gene_as_attributes() return self.openContext(self.input_data.domain) self.gs_widget.load_gene_sets(self.tax_id) if self.cluster_indicator_model and len( self.cluster_indicators) < 1: self.cluster_indicators = [self.cluster_indicator_model[0]] if self.batch_indicator_model and self.batch_indicator is None: self.batch_indicator = self.batch_indicator_model[0] self.invalidate() if self.custom_data: self.refresh_custom_gene_sets() self._handle_future_model() self.handle_custom_gene_sets() @Inputs.custom_sets def handle_custom_input(self, data): self.Error.clear() self.Warning.clear() self.closeContext() self.custom_data = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.num_of_custom_sets = None self.feature_model.set_domain(None) if data: self.custom_data = data self.feature_model.set_domain(self.custom_data.domain) self.custom_tax_id = str( self.custom_data.attributes.get(TAX_ID, None)) self.custom_use_attr_names = self.custom_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.custom_gene_id_attribute = self.custom_data.attributes.get( GENE_ID_ATTRIBUTE, None) self.custom_gene_id_column = self.custom_data.attributes.get( GENE_ID_COLUMN, None) self._handle_future_model() if self.input_data: self.openContext(self.input_data.domain) self.gs_label_combobox.setDisabled(True) self.refresh_custom_gene_sets() self.handle_custom_gene_sets(select_customs_flag=True) def __check_organism_mismatch(self): """ Check if organisms from different inputs match. :return: True if there is a mismatch """ if self.tax_id is not None and self.custom_tax_id is not None: return self.tax_id != self.custom_tax_id return False def _handle_future_model(self): if self.custom_gene_set_indicator in self.feature_model: index = self.feature_model.indexOf(self.custom_gene_set_indicator) self.custom_gene_set_indicator = self.feature_model[index] else: if self.feature_model: self.custom_gene_set_indicator = self.feature_model[0] else: self.custom_gene_set_indicator = None def handle_custom_gene_sets(self, select_customs_flag=False): if self.custom_gene_set_indicator: if self.custom_data is not None and self.custom_gene_id_column is not None: if self.__check_organism_mismatch(): self.gs_label_combobox.setDisabled(True) self.Error.organism_mismatch() self.gs_widget.update_gs_hierarchy() self.__gene_sets_enrichment() return if isinstance(self.custom_gene_set_indicator, DiscreteVariable): labels = self.custom_gene_set_indicator.values gene_sets_names = [ labels[int(idx)] for idx in self.custom_data.get_column_view( self.custom_gene_set_indicator)[0] ] else: gene_sets_names, _ = self.custom_data.get_column_view( self.custom_gene_set_indicator) self.num_of_custom_sets = len(set(gene_sets_names)) gene_names, _ = self.custom_data.get_column_view( self.custom_gene_id_column) hierarchy_title = (self.custom_data.name if self.custom_data.name else 'Custom sets', ) try: self.gs_widget.add_custom_sets( gene_sets_names, gene_names, hierarchy_title=hierarchy_title, select_customs_flag=select_customs_flag) except GeneSetException: pass self.gs_label_combobox.setDisabled(False) else: self.gs_widget.update_gs_hierarchy() self.__gene_sets_enrichment() self.__update_info_box() def refresh_custom_gene_sets(self): self.gs_widget.clear_custom_sets() # self.gs_widget.update_gs_hierarchy() def gene_scores_output(self, selected_clusters): metas = [ StringVariable('Gene'), StringVariable(NCBI_ID), StringVariable('Rank'), ContinuousVariable('Statistic score'), ContinuousVariable('P-value'), ContinuousVariable('FDR') ] if len(self.new_cluster_profile): # note: order is important metas = self.cluster_indicators + metas domain = Domain([], metas=metas, class_vars=self.cluster_var) data = [] for cluster in selected_clusters: num_of_genes = len(cluster.filtered_genes) scores = [gene.score for gene in cluster.filtered_genes] p_vals = [gene.p_val for gene in cluster.filtered_genes] fdr_vals = [gene.fdr for gene in cluster.filtered_genes] gene_names = [gene.input_name for gene in cluster.filtered_genes] gene_ids = [gene.ncbi_id for gene in cluster.filtered_genes] rank = rankdata(p_vals, method='min') if len(self.new_cluster_profile): profiles = [[cluster.index] * num_of_genes] [ profiles.append([p] * num_of_genes) for p in self.new_cluster_profile[cluster.index] ] else: profiles = [[cluster.index] * num_of_genes] for row in zip(*profiles, gene_names, gene_ids, rank, scores, p_vals, fdr_vals): data.append(list(row)) out_data = Table(domain, data) out_data.attributes[TAX_ID] = self.tax_id out_data.attributes[GENE_AS_ATTRIBUTE_NAME] = False out_data.attributes[GENE_ID_COLUMN] = NCBI_ID self.Outputs.gene_scores.send(out_data) def gene_set_scores_output(self, selected_clusters): metas = [ StringVariable('Term'), StringVariable('Term ID'), StringVariable('Rank'), ContinuousVariable('P-value'), ContinuousVariable('FDR') ] if len(self.new_cluster_profile): # note: order is important metas = self.cluster_indicators + metas domain = Domain([], metas=metas, class_vars=self.cluster_var) data = [] for cluster in selected_clusters: num_of_sets = len(cluster.filtered_gene_sets) p_vals = [gs.p_val for gs in cluster.filtered_gene_sets] fdr_vals = [gs.fdr for gs in cluster.filtered_gene_sets] gs_names = [gs.name for gs in cluster.filtered_gene_sets] gs_ids = [gs.gs_id for gs in cluster.filtered_gene_sets] rank = rankdata(p_vals, method='min') if len(self.new_cluster_profile): profiles = [[cluster.index] * num_of_sets] [ profiles.append([p] * num_of_sets) for p in self.new_cluster_profile[cluster.index] ] else: profiles = [[cluster.index] * num_of_sets] for row in zip(*profiles, gs_names, gs_ids, rank, p_vals, fdr_vals): data.append(list(row)) self.Outputs.gene_set_scores.send(Table(domain, data)) def commit(self): selection_model = self.cluster_info_view.selectionModel() selected_rows = selection_model.selectedRows() selected_clusters = [] selected_cluster_indexes = set() selected_cluster_genes = set() if not self.input_data or not selected_rows: self.Outputs.selected_data.send(None) return for sel_row in selected_rows: cluster = sel_row.data() selected_clusters.append(cluster) selected_cluster_indexes.add(cluster.index) [ selected_cluster_genes.add(gene.ncbi_id) for gene in cluster.filtered_genes ] # get columns of selected clusters selected_columns = [ column for column in self.input_data.domain.attributes if self.gene_id_attribute in column.attributes and str(column.attributes[ self.gene_id_attribute]) in selected_cluster_genes ] domain = Domain(selected_columns, self.input_data.domain.class_vars, self.input_data.domain.metas) output_data = self.input_data.from_table(domain, self.input_data) # get rows of selected clusters selected_rows = [ row_index for row_index, col_index in enumerate(self.rows_by_cluster) if col_index in selected_cluster_indexes ] # send to output signal self.Outputs.selected_data.send(output_data[selected_rows]) self.gene_scores_output(selected_clusters) self.gene_set_scores_output(selected_clusters)
class OWGenialisExpressions(widget.OWWidget, ConcurrentWidgetMixin): name = 'Genialis Expressions' priority = 30 want_main_area = True want_control_area = True icon = '../widgets/icons/OWGenialisExpressions.svg' pagination_availability = pyqtSignal(bool, bool) norm_component = settings.SettingProvider(NormalizationComponent) pagination_component = settings.SettingProvider(PaginationComponent) filter_component = settings.SettingProvider(CollapsibleFilterComponent) exp_type: int exp_type = settings.Setting(None, schema_only=True) proc_type: int proc_type = settings.Setting(None, schema_only=True) input_annotation: int input_annotation = settings.Setting(None, schema_only=True) auto_commit: bool auto_commit = settings.Setting(False, schema_only=True) class Outputs: table = Output('Expressions', Table) class Warning(widget.OWWidget.Warning): no_expressions = Msg('Expression data objects not found.') no_data_objects = Msg('No expression data matches the selected filtering options.') unexpected_feature_type = Msg('Can not import expression data, unexpected feature type "{}".') multiple_feature_type = Msg('Can not import expression data, multiple feature types found.') def __init__(self): super().__init__() ConcurrentWidgetMixin.__init__(self) self._res = None self._data_objects: Optional[List[Data]] = None self.data_output_options: Optional[DataOutputOptions] = None self.data_table: Optional[Table] = None # Control area box = gui.widgetBox(self.controlArea, 'Sign in') self.user_info = gui.label(box, self, '') self.server_info = gui.label(box, self, '') box = gui.widgetBox(box, orientation=Qt.Horizontal) self.sign_in_btn = gui.button(box, self, 'Sign In', callback=self.sign_in, autoDefault=False) self.sign_out_btn = gui.button(box, self, 'Sign Out', callback=self.sign_out, autoDefault=False) self.exp_type_box = gui.widgetBox(self.controlArea, 'Expression Type') self.exp_type_options = gui.radioButtons( self.exp_type_box, self, 'exp_type', callback=self.on_data_output_option_changed ) self.proc_type_box = gui.widgetBox(self.controlArea, 'Process Name') self.proc_type_options = gui.radioButtons( self.proc_type_box, self, 'proc_type', callback=self.on_data_output_option_changed ) self.input_anno_box = gui.widgetBox(self.controlArea, 'Expression source') self.input_anno_options = gui.radioButtons( self.input_anno_box, self, 'input_annotation', callback=self.on_data_output_option_changed ) self.norm_component = NormalizationComponent(self, self.controlArea) self.norm_component.options_changed.connect(self.on_normalization_changed) gui.rubber(self.controlArea) self.commit_button = gui.auto_commit(self.controlArea, self, 'auto_commit', '&Commit', box=False) self.commit_button.button.setAutoDefault(False) # Main area self.table_view = QTableView() self.table_view.setAlternatingRowColors(True) self.table_view.viewport().setMouseTracking(True) self.table_view.setShowGrid(False) self.table_view.verticalHeader().hide() self.table_view.horizontalHeader().setSectionResizeMode(QHeaderView.ResizeToContents) self.table_view.horizontalHeader().setStretchLastSection(True) self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows) self.table_view.setSelectionMode(QAbstractItemView.SingleSelection) # self.table_view.setStyleSheet('QTableView::item:selected{background-color: palette(highlight); color: palette(highlightedText);};') self.model = GenialisExpressionsModel(self) self.model.setHorizontalHeaderLabels(TableHeader.labels()) self.table_view.setModel(self.model) self.table_view.selectionModel().selectionChanged.connect(self.on_selection_changed) self.filter_component = CollapsibleFilterComponent(self, self.mainArea) self.filter_component.options_changed.connect(self.on_filter_changed) self.mainArea.layout().addWidget(self.table_view) self.pagination_component = PaginationComponent(self, self.mainArea) self.pagination_component.options_changed.connect(self.update_collections_view) self.sign_in(silent=True) def __invalidate(self): self.data_objects = None self.data_table = None self.Warning.no_expressions.clear() self.Warning.multiple_feature_type.clear() self.Warning.unexpected_feature_type.clear() self.info.set_output_summary(StateInfo.NoOutput) def set_input_annotation_options(self) -> None: for btn in self.input_anno_options.buttons: btn.deleteLater() self.input_anno_options.buttons = [] if not self.data_output_options: return for source, species, build in self.data_output_options.input_annotation: tooltip = f'{source}, {species}, {build}' text = f'{species}, {build}' gui.appendRadioButton(self.input_anno_options, text, tooltip=tooltip) if len(self.input_anno_options.buttons): self.input_annotation = 0 def set_proc_type_options(self) -> None: for btn in self.proc_type_options.buttons: btn.deleteLater() self.proc_type_options.buttons = [] if not self.data_output_options: return for proc_type, proc_name in self.data_output_options.process: gui.appendRadioButton(self.proc_type_options, proc_name, tooltip=proc_type) if len(self.proc_type_options.buttons): self.proc_type = 0 def set_exp_type_options(self) -> None: for btn in self.exp_type_options.buttons: btn.deleteLater() self.exp_type_options.buttons = [] if not self.data_output_options: return for _, exp_name in self.data_output_options.expression: gui.appendRadioButton(self.exp_type_options, exp_name) if len(self.exp_type_options.buttons) > 1: self.exp_type = 1 @property def res(self): return self._res @res.setter def res(self, value: ResolweAPI): if isinstance(value, ResolweAPI): self._res = value self.update_user_status() self.update_collections_view() self.__invalidate() self.Outputs.table.send(None) @property def data_objects(self): return self._data_objects @data_objects.setter def data_objects(self, data_objects: Optional[List[Data]]): self._data_objects = data_objects self.data_output_options = self._available_data_output_options() def _available_data_output_options(self) -> Optional[DataOutputOptions]: """ Traverse the data objects in the selected collection and store the information regarding available expression types, process types and input annotations used in the creation of the data object. The method returns a named tuple (`DataOutputOptions`) which used for creating radio buttons in the control area. """ if not self.data_objects: return expression_types = sorted({data.output['exp_type'] for data in self.data_objects}) expression_types = (Expression('rc', 'Read Counts'),) + tuple( Expression(exp_type, exp_type) for exp_type in expression_types ) process_types = sorted({(data.process.type, data.process.name) for data in self.data_objects}) process_types = tuple(Process(proc_type, proc_name) for proc_type, proc_name in process_types) input_annotations = sorted( {(data.output['source'], data.output['species'], data.output['build']) for data in self.data_objects} ) input_annotations = tuple( InputAnnotation(source, species, build) for source, species, build in input_annotations ) return DataOutputOptions( expression=expression_types, process=process_types, input_annotation=input_annotations ) def update_user_status(self): user = self.res.get_currently_logged_user() if user: user_info = f"{user[0].get('first_name', '')} {user[0].get('last_name', '')}".strip() user_info = f"User: {user_info if user_info else user[0].get('username', '')}" self.sign_in_btn.setEnabled(False) self.sign_out_btn.setEnabled(True) else: user_info = 'User: Anonymous' self.sign_in_btn.setEnabled(True) self.sign_out_btn.setEnabled(False) self.user_info.setText(user_info) self.server_info.setText(f'Server: {self.res.url[8:]}') def sign_in(self, silent=False): dialog = SignInForm(self) if silent: dialog.sign_in() if dialog.resolwe_instance is not None: self.res = dialog.resolwe_instance else: self.res = connect(url=DEFAULT_URL) if not silent and dialog.exec_(): self.res = dialog.resolwe_instance def sign_out(self): # Use public credentials when user signs out self.res = connect(url=DEFAULT_URL) # Remove username and cm = CredentialManager(CREDENTIAL_MANAGER_SERVICE) del cm.username del cm.password def on_filter_changed(self): self.pagination_component.reset_pagination() self.update_collections_view() def get_query_parameters(self) -> Dict[str, str]: params = { 'limit': ItemsPerPage.values()[self.pagination_component.items_per_page], 'offset': self.pagination_component.offset, 'ordering': SortBy.values()[self.filter_component.sort_by], } if self.filter_component.filter_by_full_text: params.update({'text': self.filter_component.filter_by_full_text}) if self.filter_component.filter_by_name: params.update({'name__icontains': self.filter_component.filter_by_name}) if self.filter_component.filter_by_contrib: params.update({'contributor_name': self.filter_component.filter_by_contrib}) if self.filter_component.filter_by_owner: params.update({'owners_name': self.filter_component.filter_by_owner}) last_modified = FilterByDateModified.values()[self.filter_component.filter_by_modified] if last_modified: params.update({'modified__gte': last_modified.isoformat()}) return params def get_collections(self) -> Tuple[Dict[str, str], Dict[str, str]]: # Get response from the server collections = self.res.get_collections(**self.get_query_parameters()) # Loop trough collections and store ids collection_ids = [collection['id'] for collection in collections.get('results', [])] # Get species by collection ids collection_to_species = self.res.get_species(collection_ids) return collections, collection_to_species def update_collections_view(self): collections, collection_to_species = self.get_collections() # Pass the results to data model self.model.set_data(collections.get('results', []), collection_to_species) self.table_view.setItemDelegateForColumn(TableHeader.id, gui.LinkStyledItemDelegate(self.table_view)) self.table_view.setColumnHidden(TableHeader.slug, True) self.table_view.setColumnHidden(TableHeader.tags, True) # Check pagination parameters and emit pagination_availability signal next_page = True if collections.get('next') else False previous_page = True if collections.get('previous') else False self.pagination_availability.emit(next_page, previous_page) def normalize(self, table: Table) -> Optional[Table]: if not table: return if self.norm_component.quantile_norm: table = QuantileNormalization()(table) if self.norm_component.log_norm: table = LogarithmicScale()(table) if self.norm_component.z_score_norm: table = ZScore(axis=self.norm_component.z_score_axis)(table) if self.norm_component.quantile_transform: axis = self.norm_component.quantile_transform_axis quantiles = min(table.X.shape[int(not axis)], 100) distribution = QuantileTransformDist.values()[self.norm_component.quantile_transform_dist] table = QuantileTransform(axis=axis, n_quantiles=quantiles, output_distribution=distribution)(table) return table def commit(self): self.Warning.no_data_objects.clear() self.cancel() if self.data_objects and not self.data_table: self.start( runner, self.res, self.data_objects, self.data_output_options, self.exp_type, self.proc_type, self.input_annotation, ) else: self.Outputs.table.send(self.normalize(self.data_table)) def on_data_output_option_changed(self): self.data_table = None if self.data_objects: self.commit() def on_normalization_changed(self): if self.data_objects: self.commit() def on_selection_changed(self): self.__invalidate() collection_id: str = self.get_selected_row_data(TableHeader.id) if not collection_id: return self.data_objects = self.res.get_expression_data_objects(collection_id) self.set_exp_type_options() self.set_proc_type_options() self.set_input_annotation_options() if not self.data_objects: self.Warning.no_expressions() return # Note: This here is to handle an edge case where we get # different 'feature_type' data object in a collection. # For now we raise a warning, but in the future we should # discuss about how to properly handle different types of features. feature_types = {data.output['feature_type'] for data in self.data_objects} if len(feature_types) == 1 and 'gene' not in feature_types: self.Warning.unexpected_feature_type(feature_types.pop()) self.data_objects = [] return if len(feature_types) > 1: self.Warning.multiple_feature_type() self.data_objects = [] return self.commit() def get_selected_row_data(self, column: int) -> Optional[str]: selection_model = self.table_view.selectionModel() rows = selection_model.selectedRows(column=column) if not rows: return return rows[0].data() def on_done(self, table: Table): if table: samples, genes = table.X.shape self.info.set_output_summary(f'Samples: {samples} Genes: {genes}') self.data_table = table self.Outputs.table.send(self.normalize(table)) def on_exception(self, ex): if isinstance(ex, ResolweDataObjectsNotFound): self.Warning.no_data_objects() self.Outputs.table.send(None) self.data_table = None self.info.set_output_summary(StateInfo.NoOutput) else: raise ex def on_partial_result(self, result: Any) -> None: pass def onDeleteWidget(self): self.shutdown() super().onDeleteWidget() def sizeHint(self): return QSize(1280, 620)
def set_dataset(self, data, tid=None): """Set the input dataset.""" self.closeContext() if data is not None: if tid in self._inputs: # update existing input slot slot = self._inputs[tid] view = slot.view # reset the (header) view state. view.setModel(None) view.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder) else: view = QTableView() view.setSortingEnabled(True) view.setHorizontalScrollMode(QTableView.ScrollPerPixel) if self.select_rows: view.setSelectionBehavior(QTableView.SelectRows) header = view.horizontalHeader() header.setSectionsMovable(True) header.setSectionsClickable(True) header.setSortIndicatorShown(True) header.setSortIndicator(-1, Qt.AscendingOrder) # QHeaderView does not 'reset' the model sort column, # because there is no guaranty (requirement) that the # models understand the -1 sort column. def sort_reset(index, order): if view.model() is not None and index == -1: view.model().sort(index, order) header.sortIndicatorChanged.connect(sort_reset) view.dataset = data self.tabs.addTab(view, getattr(data, "name", "Data")) self._setup_table_view(view, data) slot = TableSlot(tid, data, table_summary(data), view) view._input_slot = slot self._inputs[tid] = slot self.tabs.setCurrentIndex(self.tabs.indexOf(view)) self.set_info(slot.summary) if isinstance(slot.summary.len, concurrent.futures.Future): def update(f): QMetaObject.invokeMethod( self, "_update_info", Qt.QueuedConnection) slot.summary.len.add_done_callback(update) elif tid in self._inputs: slot = self._inputs.pop(tid) view = slot.view view.hide() view.deleteLater() self.tabs.removeTab(self.tabs.indexOf(view)) current = self.tabs.currentWidget() if current is not None: self.set_info(current._input_slot.summary) self.tabs.tabBar().setVisible(self.tabs.count() > 1) self.selected_rows = [] self.selected_cols = [] self.openContext(data) self.set_selection() self.commit()
class OWConcordance(OWWidget): name = "Concordance" description = "Display the context of the word." icon = "icons/Concordance.svg" priority = 520 class Inputs: corpus = Input("Corpus", Corpus) query_word = Input("Query Word", Topic) class Outputs: selected_documents = Output("Selected Documents", Corpus) concordances = Output("Concordances", Corpus) settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL) autocommit = Setting(True) context_width = Setting(5) word = ContextSetting("", exclude_metas=False) selected_rows = Setting([], schema_only=True) class Warning(OWWidget.Warning): multiple_words_on_input = Msg("Multiple query words on input. " "Only the first one is considered!") def __init__(self): super().__init__() self.corpus = None # Corpus self.n_matching = '' # Info on docs matching the word self.n_tokens = '' # Info on tokens self.n_types = '' # Info on types (unique tokens) self.is_word_on_input = False # Info attributes info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Tokens: %(n_tokens)s') gui.label(info_box, self, 'Types: %(n_types)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Width parameter gui.spin(self.controlArea, self, 'context_width', 3, 10, box=True, label="Number of words:", callback=self.set_width) gui.rubber(self.controlArea) # Search c_box = gui.widgetBox(self.mainArea, orientation="vertical") self.input = gui.lineEdit(c_box, self, 'word', orientation=Qt.Horizontal, sizePolicy=QSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='Query:', callback=self.set_word, callbackOnType=True) self.input.setFocus() # Concordances view self.conc_view = QTableView() self.model = ConcordanceModel() self.conc_view.setModel(self.model) self.conc_view.setWordWrap(False) self.conc_view.setSelectionBehavior(QTableView.SelectRows) self.conc_view.setSelectionModel(DocumentSelectionModel(self.model)) self.conc_view.setItemDelegate(HorizontalGridDelegate()) self.conc_view.selectionModel().selectionChanged.connect( self.selection_changed) self.conc_view.horizontalHeader().hide() self.conc_view.setShowGrid(False) self.mainArea.layout().addWidget(self.conc_view) self.set_width() # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit', 'Auto commit is on') def sizeHint(self): # pragma: no cover return QSize(600, 400) def set_width(self): sel = self.conc_view.selectionModel().selection() self.model.set_width(self.context_width) if sel: self.conc_view.selectionModel().select( sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) def selection_changed(self): selection = self.conc_view.selectionModel().selection() self.selected_rows = sorted( set(cell.row() for cell in selection.indexes())) self.commit() def set_selection(self, selection): if selection: sel = QItemSelection() for row in selection: index = self.conc_view.model().index(row, 0) sel.select(index, index) self.conc_view.selectionModel().select( sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) @Inputs.corpus def set_corpus(self, data=None): self.closeContext() self.corpus = data if data is None: # data removed, clear selection self.selected_rows = [] if not self.is_word_on_input: self.word = "" self.openContext(self.corpus) self.model.set_corpus(self.corpus) self.set_word() @Inputs.query_word def set_word_from_input(self, topic): self.Warning.multiple_words_on_input.clear() if self.is_word_on_input: # word changed, clear selection self.selected_rows = [] self.is_word_on_input = topic is not None and len(topic) > 0 self.input.setEnabled(not self.is_word_on_input) if self.is_word_on_input: if len(topic) > 1: self.Warning.multiple_words_on_input() self.word = topic.metas[0, 0] self.set_word() def set_word(self): self.selected_rows = [] self.model.set_word(self.word) self.update_widget() self.commit() def handleNewSignals(self): self.set_selection(self.selected_rows) def resize_columns(self): col_width = (self.conc_view.width() - self.conc_view.columnWidth(1)) / 2 - 12 self.conc_view.setColumnWidth(0, col_width) self.conc_view.setColumnWidth(2, col_width) def resizeEvent(self, event): # pragma: no cover super().resizeEvent(event) self.resize_columns() def update_widget(self): self.conc_view.resizeColumnToContents(1) self.resize_columns() self.conc_view.resizeRowsToContents() if self.corpus is not None: self.n_matching = '{}/{}'.format( self.model.matching_docs() if self.word else 0, len(self.corpus)) self.n_tokens = self.model.n_tokens self.n_types = self.model.n_types else: self.n_matching = '' self.n_tokens = '' self.n_types = '' def commit(self): selected_docs = sorted( set(self.model.word_index[row][0] for row in self.selected_rows)) concordance = self.model.get_data() if selected_docs: selected = self.corpus[selected_docs] self.Outputs.selected_documents.send(selected) else: self.Outputs.selected_documents.send(None) self.Outputs.concordances.send(concordance) def send_report(self): view = self.conc_view model = self.conc_view.model() self.report_items("Concordances", ( ("Query", model.word), ("Tokens", model.n_tokens), ("Types", model.n_types), ("Matching", self.n_matching), )) self.report_table(view)
class ExtendedTableView(QWidget): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.ow = kwargs.get('parent', None) # set layout layout = QVBoxLayout() layout.setContentsMargins(0, 0, 0, 0) self.setLayout(layout) # set splitter self.splitter = QSplitter() self.splitter.setOrientation(Qt.Horizontal) # data models self.genes_model = None self.info_model = None # left side list view self.genes_view = QTableView() self.genes_view.horizontalHeader().hide() self.genes_view.setItemDelegate(GeneItemDelegate()) self.genes_view.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) # right side list view self.info_view = QTableView() self.info_view.setItemDelegate(HTMLDelegate()) self.info_view.horizontalHeader().hide() self.info_view.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) self.splitter.addWidget(self.genes_view) self.splitter.addWidget(self.info_view) # self.splitter.setStretchFactor(0, 60) # self.splitter.setStretchFactor(1, 40) self.layout().addWidget(self.splitter) def set_genes_model(self, rows): self.genes_model = GeneMatcherModel() self.genes_model.add_rows(rows) def get_selected_gens(self): # return a list of QModelIndex return self.genes_selection_model().selectedRows() def reset_genes_model(self): if self.genes_model: self.genes_model.deleteLater() self.genes_model = None def genes_selection_model(self): return self.genes_view.selectionModel() def reset_info_model(self): if self.info_model: self.info_model.deleteLater() self.info_model = None self.info_view.setModel(None) def set_info_model(self, rows): unique, partial, unknown = range(len(self.ow.filter_labels)) if self.ow.selected_filter == unique: # create model self.info_model = GeneMatcherModel(show_icon=False) # add rows self.info_model.add_rows(rows) # add model to the view self.info_view.setModel(self.info_model) # disable selection of gene info cards self.info_view.setSelectionMode(QAbstractItemView.NoSelection) # call sizeHint function self.info_view.resizeRowsToContents() else: self.reset_info_model()
class OWExplainPred(OWWidget): name = "Explain Predictions" description = "Computes attribute contributions to the final prediction with an approximation algorithm for shapely value" icon = "icons/ExplainPredictions.svg" priority = 200 gui_error = settings.Setting(0.05) gui_p_val = settings.Setting(0.05) class Inputs: data = Input("Data", Table, default=True) model = Input("Model", Model, multiple=False) sample = Input("Sample", Table) class Outputs: explanations = Output("Explanations", Table) class Error(OWWidget.Error): sample_too_big = widget.Msg("Can only explain one sample at the time.") class Warning(OWWidget.Warning): unknowns_increased = widget.Msg( "Number of unknown values increased, Data and Sample domains mismatch.") def __init__(self): super().__init__() self.data = None self.model = None self.to_explain = None self.explanations = None self.stop = True self.e = None self._task = None self._executor = ThreadExecutor() self.dataview = QTableView(verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, sortingEnabled=True, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus) self.dataview.sortByColumn(2, Qt.DescendingOrder) self.dataview.horizontalHeader().setResizeMode(QHeaderView.Stretch) domain = Domain([ContinuousVariable("Score"), ContinuousVariable("Error")], metas=[StringVariable(name="Feature"), StringVariable(name="Value")]) self.placeholder_table_model = TableModel( Table.from_domain(domain), parent=None) self.dataview.setModel(self.placeholder_table_model) info_box = gui.vBox(self.controlArea, "Info") self.data_info = gui.widgetLabel(info_box, "Data: N/A") self.model_info = gui.widgetLabel(info_box, "Model: N/A") self.sample_info = gui.widgetLabel(info_box, "Sample: N/A") criteria_box = gui.vBox(self.controlArea, "Stopping criteria") self.error_spin = gui.spin(criteria_box, self, "gui_error", 0.01, 1, step=0.01, label="Error < ", spinType=float, callback=self._update_error_spin, controlWidth=80, keyboardTracking=False) self.p_val_spin = gui.spin(criteria_box, self, "gui_p_val", 0.01, 1, step=0.01, label="Error p-value < ", spinType=float, callback=self._update_p_val_spin, controlWidth=80, keyboardTracking=False) gui.rubber(self.controlArea) self.cancel_button = gui.button(self.controlArea, self, "Stop Computation", callback=self.toggle_button, autoDefault=True, tooltip="Stops and restarts computation") self.cancel_button.setDisabled(True) predictions_box = gui.vBox(self.mainArea, "Model prediction") self.predict_info = gui.widgetLabel(predictions_box, "") self.mainArea.layout().addWidget(self.dataview) self.resize(640, 480) @Inputs.data @check_sql_input def set_data(self, data): """Set input 'Data'""" self.data = data self.explanations = None self.data_info.setText("Data: N/A") self.e = None if data is not None: model = TableModel(data, parent=None) if data.X.shape[0] == 1: inst = "1 instance and " else: inst = str(data.X.shape[0]) + " instances and " if data.X.shape[1] == 1: feat = "1 feature " else: feat = str(data.X.shape[1]) + " features" self.data_info.setText("Data: " + inst + feat) @Inputs.model def set_predictor(self, model): """Set input 'Model'""" self.model = model self.model_info.setText("Model: N/A") self.explanations = None self.e = None if model is not None: self.model_info.setText("Model: " + str(model.name)) @Inputs.sample @check_sql_input def set_sample(self, sample): """Set input 'Sample', checks if size is appropriate""" self.to_explain = sample self.explanations = None self.Error.sample_too_big.clear() self.sample_info.setText("Sample: N/A") if sample is not None: if len(sample.X) != 1: self.to_explain = None self.Error.sample_too_big() else: if sample.X.shape[1] == 1: feat = "1 feature" else: feat = str(sample.X.shape[1]) + " features" self.sample_info.setText("Sample: " + feat) if self.e is not None: self.e.saved = False def handleNewSignals(self): if self._task is not None: self.cancel() assert self._task is None self.dataview.setModel(self.placeholder_table_model) self.predict_info.setText("") self.Warning.unknowns_increased.clear() self.stop = True self.cancel_button.setText("Stop Computation") self.commit_calc_or_output() def commit_calc_or_output(self): if self.data is not None and self.to_explain is not None: self.commit_calc() else: self.commit_output() def commit_calc(self): num_nan = np.count_nonzero(np.isnan(self.to_explain.X[0])) self.to_explain = self.to_explain.transform(self.data.domain) if num_nan != np.count_nonzero(np.isnan(self.to_explain.X[0])): self.Warning.unknowns_increased() if self.model is not None: # calculate contributions if self.e is None: self.e = ExplainPredictions(self.data, self.model, batch_size=min( len(self.data.X), 500), p_val=self.gui_p_val, error=self.gui_error) self._task = task = Task() def callback(progress): nonlocal task # update progress bar QMetaObject.invokeMethod( self, "set_progress_value", Qt.QueuedConnection, Q_ARG(int, progress)) if task.canceled: return True return False def callback_update(table): QMetaObject.invokeMethod( self, "update_view", Qt.QueuedConnection, Q_ARG(Orange.data.Table, table)) def callback_prediction(class_value): QMetaObject.invokeMethod( self, "update_model_prediction", Qt.QueuedConnection, Q_ARG(float, class_value)) self.was_canceled = False explain_func = partial( self.e.anytime_explain, self.to_explain[0], callback=callback, update_func=callback_update, update_prediction=callback_prediction) self.progressBarInit(processEvents=None) task.future = self._executor.submit(explain_func) task.watcher = FutureWatcher(task.future) task.watcher.done.connect(self._task_finished) self.cancel_button.setDisabled(False) @pyqtSlot(Orange.data.Table) def update_view(self, table): self.explanations = table model = TableModel(table, parent=None) header = self.dataview.horizontalHeader() model.sort( header.sortIndicatorSection(), header.sortIndicatorOrder()) self.dataview.setModel(model) self.commit_output() @pyqtSlot(float) def update_model_prediction(self, value): self._print_prediction(value) @pyqtSlot(int) def set_progress_value(self, value): self.progressBarSet(value, processEvents=False) @pyqtSlot(concurrent.futures.Future) def _task_finished(self, f): """ Parameters: ---------- f: conncurent.futures.Future future instance holding the result of learner evaluation """ assert self.thread() is QThread.currentThread() assert self._task is not None assert self._task.future is f assert f.done() self._task = None if not self.was_canceled: self.cancel_button.setDisabled(True) try: results = f.result() except Exception as ex: log = logging.getLogger() log.exception(__name__, exc_info=True) self.error("Exception occured during evaluation: {!r}".format(ex)) for key in self.results.keys(): self.results[key] = None else: self.update_view(results[1]) self.progressBarFinished(processEvents=False) def commit_output(self): """ Sends best-so-far results forward """ self.Outputs.explanations.send(self.explanations) def toggle_button(self): if self.stop : self.stop = False self.cancel_button.setText("Restart Computation") self.cancel() else: self.stop = True self.cancel_button.setText("Stop Computation") self.commit_calc_or_output() def cancel(self): """ Cancel the current task (if any). """ if self._task is not None: self._task.cancel() assert self._task.future.done() # disconnect the `_task_finished` slot self._task.watcher.done.disconnect(self._task_finished) self.was_canceled = True self._task_finished(self._task.future) def _print_prediction(self, class_value): """ Parameters ---------- class_value: float Number representing either index of predicted class value, looked up in domain, or predicted value (regression) """ name = self.data.domain.class_vars[0].name if isinstance(self.data.domain.class_vars[0], ContinuousVariable): self.predict_info.setText(name + ": " + str(class_value)) else: self.predict_info.setText( name + ": " + self.data.domain.class_vars[0].values[int(class_value)]) def _update_error_spin(self): self.cancel() if self.e is not None: self.e.error = self.gui_error self.handleNewSignals() def _update_p_val_spin(self): self.cancel() if self.e is not None: self.e.p_val = self.gui_p_val self.handleNewSignals() def onDeleteWidget(self): self.cancel() super().onDeleteWidget()
class OWFeatureStatistics(widget.OWWidget): HISTOGRAM_ASPECT_RATIO = (7, 3) MINIMUM_HISTOGRAM_HEIGHT = 50 MAXIMUM_HISTOGRAM_HEIGHT = 80 name = 'Feature Statistics' description = 'Show basic statistics for data features.' icon = 'icons/FeatureStatistics.svg' class Inputs: data = Input('Data', Table, default=True) want_main_area = True buttons_area_orientation = Qt.Vertical settingsHandler = DomainContextHandler() auto_commit = Setting(True) color_var = ContextSetting(None) # type: Optional[Variable] filter_string = ContextSetting('') def __init__(self): super().__init__() self.data = None # type: Optional[Table] self.model = None # type: Optional[FeatureStatisticsTableModel] # Information panel info_box = gui.vBox(self.controlArea, 'Info') info_box.setMinimumWidth(200) self.info_summary = gui.widgetLabel(info_box, wordWrap=True) self.info_attr = gui.widgetLabel(info_box, wordWrap=True) self.info_class = gui.widgetLabel(info_box, wordWrap=True) self.info_meta = gui.widgetLabel(info_box, wordWrap=True) self.set_info() # TODO: Implement filtering on the model # filter_box = gui.vBox(self.controlArea, 'Filter') # self.filter_text = gui.lineEdit( # filter_box, self, value='filter_string', # placeholderText='Filter variables by name', # callback=self._filter_table_variables, callbackOnType=True, # ) # shortcut = QShortcut(QKeySequence('Ctrl+f'), self, self.filter_text.setFocus) # shortcut.setWhatsThis('Filter variables by name') self.color_var_model = DomainModel( valid_types=(ContinuousVariable, DiscreteVariable), placeholder='None', ) box = gui.vBox(self.controlArea, 'Histogram') self.cb_color_var = gui.comboBox( box, master=self, value='color_var', model=self.color_var_model, label='Color:', orientation=Qt.Horizontal, ) self.cb_color_var.currentIndexChanged.connect(self.__color_var_changed) gui.rubber(self.controlArea) gui.auto_commit( self.buttonsArea, self, 'auto_commit', 'Send Selected Rows', 'Send Automatically' ) # Main area self.view = QTableView( showGrid=False, cornerButtonEnabled=False, sortingEnabled=True, selectionBehavior=QTableView.SelectRows, selectionMode=QTableView.MultiSelection, horizontalScrollMode=QTableView.ScrollPerPixel, verticalScrollMode=QTableView.ScrollPerPixel, ) hheader = self.view.horizontalHeader() hheader.setStretchLastSection(False) # Contents precision specifies how many rows should be taken into # account when computing the sizes, 0 being the visible rows. This is # crucial, since otherwise the `ResizeToContents` section resize mode # would call `sizeHint` on every single row in the data before first # render. However this, this cannot be used here, since this only # appears to work properly when the widget is actually shown. When the # widget is not shown, size `sizeHint` is called on every row. hheader.setResizeContentsPrecision(5) # Set a nice default size so that headers have some space around titles hheader.setDefaultSectionSize(120) # Set individual column behaviour in `set_data` since the logical # indices must be valid in the model, which requires data. hheader.setSectionResizeMode(QHeaderView.Interactive) vheader = self.view.verticalHeader() vheader.setVisible(False) vheader.setSectionResizeMode(QHeaderView.Fixed) def bind_histogram_aspect_ratio(logical_index, _, new_size): """Force the horizontal and vertical header to maintain the defined aspect ratio specified for the histogram.""" # Prevent function being exectued more than once per resize if logical_index is not self.model.Columns.DISTRIBUTION.index: return ratio_width, ratio_height = self.HISTOGRAM_ASPECT_RATIO unit_width = new_size / ratio_width new_height = unit_width * ratio_height effective_height = max(new_height, self.MINIMUM_HISTOGRAM_HEIGHT) effective_height = min(effective_height, self.MAXIMUM_HISTOGRAM_HEIGHT) vheader.setDefaultSectionSize(effective_height) def keep_row_centered(logical_index, old_size, new_size): """When resizing the widget when scrolled further down, the positions of rows changes. Obviously, the user resized in order to better see the row of interest. This keeps that row centered.""" # TODO: This does not work properly # Prevent function being exectued more than once per resize if logical_index is not self.model.Columns.DISTRIBUTION.index: return top_row = self.view.indexAt(self.view.rect().topLeft()).row() bottom_row = self.view.indexAt(self.view.rect().bottomLeft()).row() middle_row = top_row + (bottom_row - top_row) // 2 self.view.scrollTo(self.model.index(middle_row, 0), QTableView.PositionAtCenter) hheader.sectionResized.connect(bind_histogram_aspect_ratio) hheader.sectionResized.connect(keep_row_centered) self.distribution_delegate = DistributionDelegate() self.view.setItemDelegate(self.distribution_delegate) self.mainArea.layout().addWidget(self.view) def sizeHint(self): return QSize(900, 500) def _filter_table_variables(self): regex = QRegExp(self.filter_string) # If the user explicitly types different cases, we assume they know # what they are searching for and account for letter case in filter different_case = ( any(c.islower() for c in self.filter_string) and any(c.isupper() for c in self.filter_string) ) if not different_case: regex.setCaseSensitivity(Qt.CaseInsensitive) @Inputs.data def set_data(self, data): self.closeContext() self.data = data if data is not None: self.model = FeatureStatisticsTableModel(data, parent=self) self.color_var_model.set_domain(data.domain) # Set the selected index to 1 if any target classes, otherwise 0 if data.domain.class_vars: self.color_var = data.domain.class_vars[0] self.openContext(self.data) else: self.model = None self.color_var_model.set_domain(None) self.view.setModel(self.model) self._filter_table_variables() self.distribution_delegate.clear() self.set_info() # The resize modes for individual columns must be set here, because # the logical index must be valid in `setSectionResizeMode`. It is not # valid when there is no data in the model. if self.model: columns, hheader = self.model.Columns, self.view.horizontalHeader() hheader.setSectionResizeMode(columns.ICON.index, QHeaderView.ResizeToContents) hheader.setSectionResizeMode(columns.DISTRIBUTION.index, QHeaderView.Stretch) @pyqtSlot(int) def __color_var_changed(self, new_index): attribute = None if new_index < 1 else self.cb_color_var.model()[new_index] self.distribution_delegate.set_color_attribute(attribute) if self.model: for row_idx in range(self.model.rowCount()): index = self.model.index( row_idx, self.model.Columns.DISTRIBUTION.index) self.view.update(index) @staticmethod def _format_variables_string(variables): agg = [] for var_type_name, var_type in [ ('categorical', DiscreteVariable), ('numeric', ContinuousVariable), ('time', TimeVariable), ('string', StringVariable) ]: var_type_list = [v for v in variables if isinstance(v, var_type)] if var_type_list: agg.append(( '%d %s' % (len(var_type_list), var_type_name), len(var_type_list) )) if not agg: return 'No variables' attrs, counts = list(zip(*agg)) if len(attrs) > 1: var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1] else: var_string = attrs[0] return plural('%s variable{s}' % var_string, sum(counts)) def set_info(self): if self.data is not None: self.info_summary.setText('<b>%s</b> contains %s with %s' % ( self.data.name, plural('{number} instance{s}', self.model.n_instances), plural('{number} feature{s}', self.model.n_attributes) )) self.info_attr.setText( '<b>Attributes:</b><br>%s' % self._format_variables_string(self.data.domain.attributes) ) self.info_class.setText( '<b>Class variables:</b><br>%s' % self._format_variables_string(self.data.domain.class_vars) ) self.info_meta.setText( '<b>Metas:</b><br>%s' % self._format_variables_string(self.data.domain.metas) ) else: self.info_summary.setText('No data on input.') self.info_attr.setText('') self.info_class.setText('') self.info_meta.setText('') def commit(self): pass def send_report(self): pass
class OWCorpusViewer(OWWidget): name = "Corpus Viewer" description = "Display corpus contents." icon = "icons/CorpusViewer.svg" priority = 70 inputs = [(IO.DATA, Table, 'set_data')] outputs = [(IO.MATCHED, Corpus, widget.Default), (IO.UNMATCHED, Corpus)] search_indices = ContextSetting([0]) # features included in search display_indices = ContextSetting([0]) # features for display show_tokens = Setting(False) autocommit = Setting(True) class Warning(OWWidget.Warning): no_feats_search = Msg('No features included in search.') no_feats_display = Msg('No features selected for display.') def __init__(self): super().__init__() self.corpus = None # Corpus self.corpus_docs = None # Documents generated from Corpus self.output_mask = [] # Output corpus indices self.doc_webview = None # WebView for showing content self.search_features = [] # two copies are needed since Display allows drag & drop self.display_features = [] # Info attributes self.update_info() info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Documents: %(n_documents)s') gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s') gui.label(info_box, self, ' ◦ Tokens: %(n_tokens)s') gui.label(info_box, self, ' ◦ Types: %(n_types)s') gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s') gui.label(info_box, self, 'N-grams range: %(ngram_range)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Search features self.search_listbox = gui.listBox( self.controlArea, self, 'search_indices', 'search_features', selectionMode=QListView.ExtendedSelection, box='Search features', callback=self.regenerate_docs,) # Display features display_box = gui.widgetBox(self.controlArea, 'Display features') self.display_listbox = gui.listBox( display_box, self, 'display_indices', 'display_features', selectionMode=QListView.ExtendedSelection, callback=self.show_docs, enableDragDrop=True) self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens', 'Show Tokens && Tags', callback=self.show_docs) # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on') # Search self.filter_input = gui.lineEdit(self.mainArea, self, '', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='RegExp Filter:') self.filter_input.textChanged.connect(self.refresh_search) # Main area self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, ) # Document list self.doc_list = QTableView() self.doc_list.setSelectionBehavior(QTableView.SelectRows) self.doc_list.setSelectionMode(QTableView.ExtendedSelection) self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers) self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.doc_list.horizontalHeader().setVisible(False) self.splitter.addWidget(self.doc_list) self.doc_list_model = QStandardItemModel(self) self.doc_list.setModel(self.doc_list_model) self.doc_list.selectionModel().selectionChanged.connect(self.show_docs) # Document contents # For PyQt5 WebEngine's setHtml grabs the focus and makes typing hard # More info: http://stackoverflow.com/questions/36609489 # To bypass the annoying behaviour disable the widget for WebEngine self.doc_webview = gui.WebviewWidget(self.splitter, self, debug=True, enabled=HAVE_WEBKIT) self.mainArea.layout().addWidget(self.splitter) def copy_to_clipboard(self): text = self.doc_webview.selectedText() QApplication.clipboard().setText(text) def set_data(self, data=None): self.reset_widget() self.corpus = data if data is not None: if not isinstance(data, Corpus): self.corpus = Corpus.from_table(data.domain, data) self.load_features() self.regenerate_docs() self.commit() def reset_widget(self): # Corpus self.corpus = None self.corpus_docs = None self.output_mask = [] # Widgets self.search_listbox.clear() self.display_listbox.clear() self.filter_input.clear() self.update_info() # Models/vars self.search_features.clear() self.display_features.clear() self.search_indices.clear() self.display_indices.clear() self.doc_list_model.clear() # Warnings self.Warning.clear() # WebView self.doc_webview.setHtml('') def load_features(self): self.search_indices = [] self.display_indices = [] if self.corpus is not None: domain = self.corpus.domain self.search_features = list(filter_visible(chain(domain.variables, domain.metas))) self.display_features = list(filter_visible(chain(domain.variables, domain.metas))) # FIXME: Select features based on ContextSetting self.search_indices = list(range(len(self.search_features))) self.display_indices = list(range(len(self.display_features))) # Enable/disable tokens checkbox if not self.corpus.has_tokens(): self.show_tokens_checkbox.setCheckState(False) self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens()) def list_docs(self): """ List documents into the left scrolling area """ search_keyword = self.filter_input.text().strip('|') try: reg = re.compile(search_keyword, re.IGNORECASE) except sre_constants.error: return def is_match(x): return not bool(search_keyword) or reg.search(x) self.output_mask.clear() self.doc_list_model.clear() for i, (doc, title, content) in enumerate(zip(self.corpus, self.corpus.titles, self.corpus_docs)): if is_match(content): item = QStandardItem() item.setData(title, Qt.DisplayRole) item.setData(doc, Qt.UserRole) self.doc_list_model.appendRow(item) self.output_mask.append(i) if self.doc_list_model.rowCount() > 0: self.doc_list.selectRow(0) # Select the first document else: self.doc_webview.setHtml('') self.commit() def show_docs(self): """ Show the selected documents in the right area """ HTML = ''' <!doctype html> <html> <head> <meta charset='utf-8'> <style> table {{ border-collapse: collapse; }} mark {{ background: #FFCD28; }} tr > td {{ padding-bottom: 3px; padding-top: 3px; }} body {{ font-family: Helvetica; font-size: 10pt; }} .line {{ border-bottom: 1px solid #000; }} .separator {{ height: 5px; }} .variables {{ vertical-align: top; padding-right: 10px; }} .token {{ padding: 3px; border: 1px #B0B0B0 solid; margin-right: 5px; margin-bottom: 5px; display: inline-block; }} </style> </head> <body> {} </body> </html> ''' if self.corpus is None: return self.Warning.no_feats_display.clear() if len(self.display_indices) == 0: self.Warning.no_feats_display() if self.show_tokens: tokens = list(self.corpus.ngrams_iterator(include_postags=True)) marked_search_features = [f for i, f in enumerate(self.search_features) if i in self.search_indices] html = '<table>' for doc_count, index in enumerate(self.doc_list.selectionModel().selectedRows()): if doc_count > 0: # add split html += '<tr class="line separator"><td/><td/></tr>' \ '<tr class="separator"><td/><td/></tr>' row_ind = index.data(Qt.UserRole).row_index for ind in self.display_indices: feature = self.display_features[ind] mark = 'class="mark-area"' if feature in marked_search_features else '' value = index.data(Qt.UserRole)[feature.name] html += '<tr><td class="variables"><strong>{}:</strong></td>' \ '<td {}>{}</td></tr>'.format( feature.name, mark, value) if self.show_tokens: html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \ '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format( token) for token in tokens[row_ind])) html += '</table>' self.doc_webview.setHtml(HTML.format(html)) self.load_js() self.highlight_docs() def load_js(self): resources = os.path.join(os.path.dirname(__file__), 'resources') for script in ('jquery-3.1.1.min.js', 'jquery.mark.min.js', 'highlighter.js', ): self.doc_webview.evalJS(open(os.path.join(resources, script), encoding='utf-8').read()) def regenerate_docs(self): self.corpus_docs = None self.Warning.no_feats_search.clear() if self.corpus is not None: feats = [self.search_features[i] for i in self.search_indices] if len(feats) == 0: self.Warning.no_feats_search() self.corpus_docs = self.corpus.documents_from_features(feats) self.refresh_search() def refresh_search(self): if self.corpus: self.list_docs() self.update_info() def highlight_docs(self): search_keyword = self.filter_input.text().\ strip('|').replace('\\', '\\\\') # escape one \ to two for mark.js if search_keyword: self.doc_webview.evalJS('mark("{}");'.format(search_keyword)) def update_info(self): if self.corpus is not None: self.n_documents = len(self.corpus) self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), self.n_documents) self.n_tokens = sum(map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a' self.n_types = len(self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a' self.is_preprocessed = self.corpus.has_tokens() self.is_pos_tagged = self.corpus.pos_tags is not None self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range) else: self.n_documents = '' self.n_matching = '' self.n_tokens = '' self.n_types = '' self.is_preprocessed = '' self.is_pos_tagged = '' self.ngram_range = '' def commit(self): if self.corpus is not None: matched = self.corpus[self.output_mask] output_mask = set(self.output_mask) unmatched_mask = [i for i in range(len(self.corpus)) if i not in output_mask] unmatched = self.corpus[unmatched_mask] self.send(IO.MATCHED, matched) self.send(IO.UNMATCHED, unmatched) else: self.send(IO.MATCHED, None) self.send(IO.UNMATCHED, None)
class OWCorpusViewer(OWWidget): name = "Corpus Viewer" description = "Display corpus contents." icon = "icons/CorpusViewer.svg" priority = 70 inputs = [(IO.DATA, Table, 'set_data')] outputs = [(IO.MATCHED, Corpus, widget.Default), (IO.UNMATCHED, Corpus)] search_indices = ContextSetting([0]) # features included in search display_indices = ContextSetting([0]) # features for display show_tokens = Setting(False) autocommit = Setting(True) class Warning(OWWidget.Warning): no_feats_search = Msg('No features included in search.') no_feats_display = Msg('No features selected for display.') def __init__(self): super().__init__() self.corpus = None # Corpus self.corpus_docs = None # Documents generated from Corpus self.output_mask = [] # Output corpus indices self.doc_webview = None # WebView for showing content self.search_features = [ ] # two copies are needed since Display allows drag & drop self.display_features = [] # Info attributes self.update_info() info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Documents: %(n_documents)s') gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s') gui.label(info_box, self, ' ◦ Tokens: %(n_tokens)s') gui.label(info_box, self, ' ◦ Types: %(n_types)s') gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s') gui.label(info_box, self, 'N-grams range: %(ngram_range)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Search features self.search_listbox = gui.listBox( self.controlArea, self, 'search_indices', 'search_features', selectionMode=QListView.ExtendedSelection, box='Search features', callback=self.regenerate_docs, ) # Display features display_box = gui.widgetBox(self.controlArea, 'Display features') self.display_listbox = gui.listBox( display_box, self, 'display_indices', 'display_features', selectionMode=QListView.ExtendedSelection, callback=self.show_docs, enableDragDrop=True) self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens', 'Show Tokens && Tags', callback=self.show_docs) # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on') # Search self.filter_input = gui.lineEdit(self.mainArea, self, '', orientation=Qt.Horizontal, sizePolicy=QSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='RegExp Filter:') self.filter_input.textChanged.connect(self.refresh_search) # Main area self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, ) # Document list self.doc_list = QTableView() self.doc_list.setSelectionBehavior(QTableView.SelectRows) self.doc_list.setSelectionMode(QTableView.ExtendedSelection) self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers) self.doc_list.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) self.doc_list.horizontalHeader().setVisible(False) self.splitter.addWidget(self.doc_list) self.doc_list_model = QStandardItemModel(self) self.doc_list.setModel(self.doc_list_model) self.doc_list.selectionModel().selectionChanged.connect(self.show_docs) # Document contents self.doc_webview = gui.WebviewWidget(self.splitter, debug=False) self.mainArea.layout().addWidget(self.splitter) def copy_to_clipboard(self): text = self.doc_webview.selectedText() QApplication.clipboard().setText(text) def set_data(self, data=None): self.reset_widget() self.corpus = data if data is not None: if not isinstance(data, Corpus): self.corpus = Corpus.from_table(data.domain, data) self.load_features() self.regenerate_docs() self.commit() def reset_widget(self): # Corpus self.corpus = None self.corpus_docs = None self.output_mask = [] # Widgets self.search_listbox.clear() self.display_listbox.clear() self.filter_input.clear() self.update_info() # Models/vars self.search_features.clear() self.display_features.clear() self.search_indices.clear() self.display_indices.clear() self.doc_list_model.clear() # Warnings self.Warning.clear() # WebView self.doc_webview.setHtml('') def load_features(self): self.search_indices = [] self.display_indices = [] if self.corpus is not None: domain = self.corpus.domain self.search_features = list( filter_visible(chain(domain.variables, domain.metas))) self.display_features = list( filter_visible(chain(domain.variables, domain.metas))) # FIXME: Select features based on ContextSetting self.search_indices = list(range(len(self.search_features))) self.display_indices = list(range(len(self.display_features))) # Enable/disable tokens checkbox if not self.corpus.has_tokens(): self.show_tokens_checkbox.setCheckState(False) self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens()) def list_docs(self): """ List documents into the left scrolling area """ search_keyword = self.filter_input.text().strip('|') try: reg = re.compile(search_keyword, re.IGNORECASE) except sre_constants.error: return def is_match(x): return not bool(search_keyword) or reg.search(x) self.output_mask.clear() self.doc_list_model.clear() for i, (doc, title, content) in enumerate( zip(self.corpus, self.corpus.titles, self.corpus_docs)): if is_match(content): item = QStandardItem() item.setData(title, Qt.DisplayRole) item.setData(doc, Qt.UserRole) self.doc_list_model.appendRow(item) self.output_mask.append(i) if self.doc_list_model.rowCount() > 0: self.doc_list.selectRow(0) # Select the first document else: self.doc_webview.setHtml('') self.commit() def show_docs(self): """ Show the selected documents in the right area """ HTML = ''' <!doctype html> <html> <head> <meta charset='utf-8'> <style> table {{ border-collapse: collapse; }} mark {{ background: #FFCD28; }} tr > td {{ padding-bottom: 3px; padding-top: 3px; }} body {{ font-family: Helvetica; font-size: 10pt; }} .line {{ border-bottom: 1px solid #000; }} .separator {{ height: 5px; }} .variables {{ vertical-align: top; padding-right: 10px; }} .token {{ padding: 3px; border: 1px #B0B0B0 solid; margin-right: 5px; margin-bottom: 5px; display: inline-block; }} img {{ max-width: 100%; }} </style> </head> <body> {} </body> </html> ''' if self.corpus is None: return self.Warning.no_feats_display.clear() if len(self.display_indices) == 0: self.Warning.no_feats_display() if self.show_tokens: tokens = list(self.corpus.ngrams_iterator(include_postags=True)) marked_search_features = [ f for i, f in enumerate(self.search_features) if i in self.search_indices ] html = '<table>' for doc_count, index in enumerate( self.doc_list.selectionModel().selectedRows()): if doc_count > 0: # add split html += '<tr class="line separator"><td/><td/></tr>' \ '<tr class="separator"><td/><td/></tr>' row_ind = index.data(Qt.UserRole).row_index for ind in self.display_indices: feature = self.display_features[ind] mark = 'class="mark-area"' if feature in marked_search_features else '' value = str(index.data(Qt.UserRole)[feature.name]) is_image = feature.attributes.get('type', '') == 'image' if is_image and value != '?': value = '<img src="{}"></img>'.format(value) html += '<tr><td class="variables"><strong>{}:</strong></td>' \ '<td {}>{}</td></tr>'.format( feature.name, mark, value) if self.show_tokens: html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \ '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format( token) for token in tokens[row_ind])) html += '</table>' # QUrl is a workaround to allow local resources # https://bugreports.qt.io/browse/QTBUG-55902?focusedCommentId=335945 self.doc_webview.setHtml(HTML.format(html), QUrl("file://")) self.load_js() self.highlight_docs() def load_js(self): resources = os.path.join(os.path.dirname(__file__), 'resources') for script in ( 'jquery-3.1.1.min.js', 'jquery.mark.min.js', 'highlighter.js', ): self.doc_webview.evalJS( open(os.path.join(resources, script), encoding='utf-8').read()) def regenerate_docs(self): self.corpus_docs = None self.Warning.no_feats_search.clear() if self.corpus is not None: feats = [self.search_features[i] for i in self.search_indices] if len(feats) == 0: self.Warning.no_feats_search() self.corpus_docs = self.corpus.documents_from_features(feats) self.refresh_search() def refresh_search(self): if self.corpus is not None: self.list_docs() self.update_info() def highlight_docs(self): search_keyword = self.filter_input.text().\ strip('|').replace('\\', '\\\\') # escape one \ to two for mark.js if search_keyword: self.doc_webview.evalJS('mark("{}");'.format(search_keyword)) def update_info(self): if self.corpus is not None: self.n_documents = len(self.corpus) self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), self.n_documents) self.n_tokens = sum( map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a' self.n_types = len( self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a' self.is_preprocessed = self.corpus.has_tokens() self.is_pos_tagged = self.corpus.pos_tags is not None self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range) else: self.n_documents = '' self.n_matching = '' self.n_tokens = '' self.n_types = '' self.is_preprocessed = '' self.is_pos_tagged = '' self.ngram_range = '' def commit(self): if self.corpus is not None: matched = self.corpus[self.output_mask] output_mask = set(self.output_mask) unmatched_mask = [ i for i in range(len(self.corpus)) if i not in output_mask ] unmatched = self.corpus[unmatched_mask] self.send(IO.MATCHED, matched) self.send(IO.UNMATCHED, unmatched) else: self.send(IO.MATCHED, None) self.send(IO.UNMATCHED, None)
class OWConcordance(OWWidget): name = "Concordance" description = "Display the context of the word." icon = "icons/Concordance.svg" priority = 520 class Inputs: corpus = Input("Corpus", Corpus) query_word = Input("Query Word", Topic) class Outputs: selected_documents = Output("Selected Documents", Corpus) concordances = Output("Concordances", Corpus) settingsHandler = PerfectDomainContextHandler( match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL ) autocommit = Setting(True) context_width = Setting(5) word = ContextSetting("", exclude_metas=False) selected_rows = Setting([], schema_only=True) class Warning(OWWidget.Warning): multiple_words_on_input = Msg("Multiple query words on input. " "Only the first one is considered!") def __init__(self): super().__init__() self.corpus = None # Corpus self.n_matching = '' # Info on docs matching the word self.n_tokens = '' # Info on tokens self.n_types = '' # Info on types (unique tokens) self.is_word_on_input = False # Info attributes info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Tokens: %(n_tokens)s') gui.label(info_box, self, 'Types: %(n_types)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Width parameter gui.spin(self.controlArea, self, 'context_width', 3, 10, box=True, label="Number of words:", callback=self.set_width) gui.rubber(self.controlArea) # Search c_box = gui.widgetBox(self.mainArea, orientation="vertical") self.input = gui.lineEdit( c_box, self, 'word', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='Query:', callback=self.set_word, callbackOnType=True) self.input.setFocus() # Concordances view self.conc_view = QTableView() self.model = ConcordanceModel() self.conc_view.setModel(self.model) self.conc_view.setWordWrap(False) self.conc_view.setSelectionBehavior(QTableView.SelectRows) self.conc_view.setSelectionModel(DocumentSelectionModel(self.model)) self.conc_view.setItemDelegate(HorizontalGridDelegate()) self.conc_view.selectionModel().selectionChanged.connect(self.selection_changed) self.conc_view.horizontalHeader().hide() self.conc_view.setShowGrid(False) self.mainArea.layout().addWidget(self.conc_view) self.set_width() # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit', 'Auto commit is on') def sizeHint(self): # pragma: no cover return QSize(600, 400) def set_width(self): sel = self.conc_view.selectionModel().selection() self.model.set_width(self.context_width) if sel: self.conc_view.selectionModel().select(sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) def selection_changed(self): selection = self.conc_view.selectionModel().selection() self.selected_rows = sorted(set(cell.row() for cell in selection.indexes())) self.commit() def set_selection(self, selection): if selection: sel = QItemSelection() for row in selection: index = self.conc_view.model().index(row, 0) sel.select(index, index) self.conc_view.selectionModel().select(sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) @Inputs.corpus def set_corpus(self, data=None): self.closeContext() self.corpus = data if data is None: # data removed, clear selection self.selected_rows = [] if not self.is_word_on_input: self.word = "" self.openContext(self.corpus) self.model.set_corpus(self.corpus) self.set_word() @Inputs.query_word def set_word_from_input(self, topic): self.Warning.multiple_words_on_input.clear() if self.is_word_on_input: # word changed, clear selection self.selected_rows = [] self.is_word_on_input = topic is not None and len(topic) > 0 self.input.setEnabled(not self.is_word_on_input) if self.is_word_on_input: if len(topic) > 1: self.Warning.multiple_words_on_input() self.word = topic.metas[0, 0] self.set_word() def set_word(self): self.selected_rows = [] self.model.set_word(self.word) self.update_widget() self.commit() def handleNewSignals(self): self.set_selection(self.selected_rows) def resize_columns(self): col_width = (self.conc_view.width() - self.conc_view.columnWidth(1)) / 2 - 12 self.conc_view.setColumnWidth(0, col_width) self.conc_view.setColumnWidth(2, col_width) def resizeEvent(self, event): # pragma: no cover super().resizeEvent(event) self.resize_columns() def update_widget(self): self.conc_view.resizeColumnToContents(1) self.resize_columns() self.conc_view.resizeRowsToContents() if self.corpus is not None: self.n_matching = '{}/{}'.format( self.model.matching_docs() if self.word else 0, len(self.corpus)) self.n_tokens = self.model.n_tokens self.n_types = self.model.n_types else: self.n_matching = '' self.n_tokens = '' self.n_types = '' def commit(self): selected_docs = sorted(set(self.model.word_index[row][0] for row in self.selected_rows)) concordance = self.model.get_data() if selected_docs: selected = self.corpus[selected_docs] self.Outputs.selected_documents.send(selected) else: self.Outputs.selected_documents.send(None) self.Outputs.concordances.send(concordance) def send_report(self): view = self.conc_view model = self.conc_view.model() self.report_items("Concordances", ( ("Query", model.word), ("Tokens", model.n_tokens), ("Types", model.n_types), ("Matching", self.n_matching), )) self.report_table(view)
class OWCorpusViewer(OWWidget): name = "Corpus Viewer" description = "Display corpus contents." icon = "icons/CorpusViewer.svg" priority = 500 class Inputs: corpus = Input("Corpus", Corpus, replaces=["Data"]) class Outputs: matching_docs = Output("Matching Docs", Corpus, default=True) other_docs = Output("Other Docs", Corpus) settingsHandler = PerfectDomainContextHandler( match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL ) search_indices = ContextSetting([], exclude_metas=False) # features included in search display_indices = ContextSetting([], exclude_metas=False) # features for display display_features = ContextSetting([], exclude_metas=False) regexp_filter = ContextSetting("") selection = [0] # TODO: DataHashContextHandler show_tokens = Setting(False) autocommit = Setting(True) class Warning(OWWidget.Warning): no_feats_search = Msg('No features included in search.') no_feats_display = Msg('No features selected for display.') def __init__(self): super().__init__() self.corpus = None # Corpus self.corpus_docs = None # Documents generated from Corpus self.output_mask = [] # Output corpus indices self.doc_webview = None # WebView for showing content self.search_features = [] # two copies are needed since Display allows drag & drop self.display_list_indices = [0] # Info attributes self.update_info() info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Documents: %(n_documents)s') gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s') gui.label(info_box, self, ' ◦ Tokens: %(n_tokens)s') gui.label(info_box, self, ' ◦ Types: %(n_types)s') gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s') gui.label(info_box, self, 'N-grams range: %(ngram_range)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Search features self.search_listbox = gui.listBox( self.controlArea, self, 'search_indices', 'search_features', selectionMode=QListView.ExtendedSelection, box='Search features', callback=self.search_features_changed) # Display features display_box = gui.widgetBox(self.controlArea, 'Display features') self.display_listbox = gui.listBox( display_box, self, 'display_list_indices', 'display_features', selectionMode=QListView.ExtendedSelection, callback=self.show_docs, enableDragDrop=True) self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens', 'Show Tokens && Tags', callback=self.show_docs) # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on') # Search self.filter_input = gui.lineEdit(self.mainArea, self, 'regexp_filter', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='RegExp Filter:') self.filter_input.textChanged.connect(self.refresh_search) # Main area self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, ) # Document list self.doc_list = QTableView() self.doc_list.setSelectionBehavior(QTableView.SelectRows) self.doc_list.setSelectionMode(QTableView.ExtendedSelection) self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers) self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.doc_list.horizontalHeader().setVisible(False) self.splitter.addWidget(self.doc_list) self.doc_list_model = QStandardItemModel(self) self.doc_list.setModel(self.doc_list_model) self.doc_list.selectionModel().selectionChanged.connect(self.show_docs) # Document contents self.doc_webview = gui.WebviewWidget(self.splitter, debug=False) self.mainArea.layout().addWidget(self.splitter) def copy_to_clipboard(self): text = self.doc_webview.selectedText() QApplication.clipboard().setText(text) @Inputs.corpus def set_data(self, corpus=None): self.closeContext() self.reset_widget() self.corpus = corpus self.search_features = [] if corpus is not None: domain = self.corpus.domain # Enable/disable tokens checkbox if not self.corpus.has_tokens(): self.show_tokens_checkbox.setCheckState(False) self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens()) self.search_features = list(filter_visible(chain(domain.variables, domain.metas))) self.display_features = list(filter_visible(chain(domain.variables, domain.metas))) self.search_indices = list(range(len(self.search_features))) self.display_indices = list(range(len(self.display_features))) self.selection = [0] self.openContext(self.corpus) self.display_list_indices = self.display_indices self.regenerate_docs() self.list_docs() self.update_info() self.set_selection() self.show_docs() self.commit() def reset_widget(self): # Corpus self.corpus = None self.corpus_docs = None self.output_mask = [] self.display_features = [] # Widgets self.search_listbox.clear() self.display_listbox.clear() self.filter_input.clear() self.update_info() # Models/vars self.search_features.clear() self.search_indices.clear() self.display_indices.clear() self.doc_list_model.clear() # Warnings self.Warning.clear() # WebView self.doc_webview.setHtml('') def list_docs(self): """ List documents into the left scrolling area """ if self.corpus_docs is None: return search_keyword = self.regexp_filter.strip('|') try: reg = re.compile(search_keyword, re.IGNORECASE) except sre_constants.error: return def is_match(x): return not bool(search_keyword) or reg.search(x) self.output_mask.clear() self.doc_list_model.clear() for i, (doc, title, content) in enumerate(zip(self.corpus, self.corpus.titles, self.corpus_docs)): if is_match(content): item = QStandardItem() item.setData(title, Qt.DisplayRole) item.setData(doc, Qt.UserRole) self.doc_list_model.appendRow(item) self.output_mask.append(i) def reset_selection(self): if self.doc_list_model.rowCount() > 0: self.doc_list.selectRow(0) # Select the first document else: self.doc_webview.setHtml('') def set_selection(self): view = self.doc_list if len(self.selection): selection = QItemSelection() for row in self.selection: selection.append( QItemSelectionRange( view.model().index(row, 0), view.model().index(row, 0) ) ) view.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def show_docs(self): """ Show the selected documents in the right area """ HTML = ''' <!doctype html> <html> <head> <script type="text/javascript" src="resources/jquery-3.1.1.min.js"> </script> <script type="text/javascript" src="resources/jquery.mark.min.js"> </script> <script type="text/javascript" src="resources/highlighter.js"> </script> <meta charset='utf-8'> <style> table {{ border-collapse: collapse; }} mark {{ background: #FFCD28; }} tr > td {{ padding-bottom: 3px; padding-top: 3px; }} body {{ font-family: Helvetica; font-size: 10pt; }} .line {{ border-bottom: 1px solid #000; }} .separator {{ height: 5px; }} .variables {{ vertical-align: top; padding-right: 10px; }} .content {{ /* Adopted from https://css-tricks.com/snippets/css/prevent-long-urls-from-breaking-out-of-container/ */ /* These are technically the same, but use both */ overflow-wrap: break-word; word-wrap: break-word; -ms-word-break: break-all; /* This is the dangerous one in WebKit, as it breaks things wherever */ word-break: break-all; /* Instead use this non-standard one: */ word-break: break-word; /* Adds a hyphen where the word breaks, if supported (No Blink) */ -ms-hyphens: auto; -moz-hyphens: auto; -webkit-hyphens: auto; hyphens: auto; }} .token {{ padding: 3px; border: 1px #B0B0B0 solid; margin-right: 5px; margin-bottom: 5px; display: inline-block; }} img {{ max-width: 100%; }} </style> </head> <body> {} </body> </html> ''' self.display_indices = self.display_list_indices if self.corpus is None: return self.Warning.no_feats_display.clear() if len(self.display_indices) == 0: self.Warning.no_feats_display() if self.show_tokens: tokens = list(self.corpus.ngrams_iterator(include_postags=True)) marked_search_features = [f for i, f in enumerate(self.search_features) if i in self.search_indices] html = '<table>' selection = [i.row() for i in self.doc_list.selectionModel().selectedRows()] if selection != []: self.selection = selection for doc_count, index in enumerate(self.doc_list.selectionModel().selectedRows()): if doc_count > 0: # add split html += '<tr class="line separator"><td/><td/></tr>' \ '<tr class="separator"><td/><td/></tr>' row_ind = index.data(Qt.UserRole).row_index for ind in self.display_indices: feature = self.display_features[ind] value = str(index.data(Qt.UserRole)[feature.name]) if feature in marked_search_features: value = self.__mark_text(value) value = value.replace('\n', '<br/>') is_image = feature.attributes.get('type', '') == 'image' if is_image and value != '?': value = '<img src="{}"></img>'.format(value) html += '<tr><td class="variables"><strong>{}:</strong></td>' \ '<td class="content">{}</td></tr>'.format( feature.name, value) if self.show_tokens: html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \ '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format( token) for token in tokens[row_ind])) html += '</table>' base = QUrl.fromLocalFile(__file__) self.doc_webview.setHtml(HTML.format(html), base) def __mark_text(self, text): search_keyword = self.regexp_filter.strip('|') if not search_keyword: return text try: reg = re.compile(search_keyword, re.IGNORECASE | re.MULTILINE) except sre_constants.error: return text matches = list(reg.finditer(text)) if not matches: return text text = list(text) for m in matches[::-1]: text[m.start():m.end()] = list('<mark data-markjs="true">{}</mark>'\ .format("".join(text[m.start():m.end()]))) return "".join(text) def search_features_changed(self): self.regenerate_docs() self.refresh_search() def regenerate_docs(self): self.corpus_docs = None self.Warning.no_feats_search.clear() if self.corpus is not None: feats = [self.search_features[i] for i in self.search_indices] if len(feats) == 0: self.Warning.no_feats_search() self.corpus_docs = self.corpus.documents_from_features(feats) def refresh_search(self): if self.corpus is not None: self.list_docs() self.reset_selection() self.update_info() self.commit() def update_info(self): if self.corpus is not None: self.n_documents = len(self.corpus) self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), self.n_documents) self.n_tokens = sum(map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a' self.n_types = len(self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a' self.is_preprocessed = self.corpus.has_tokens() self.is_pos_tagged = self.corpus.pos_tags is not None self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range) else: self.n_documents = '' self.n_matching = '' self.n_tokens = '' self.n_types = '' self.is_preprocessed = '' self.is_pos_tagged = '' self.ngram_range = '' def commit(self): if self.corpus is not None: matched = self.corpus[self.output_mask] output_mask = set(self.output_mask) unmatched_mask = [i for i in range(len(self.corpus)) if i not in output_mask] unmatched = self.corpus[unmatched_mask] self.Outputs.matching_docs.send(matched) self.Outputs.other_docs.send(unmatched) else: self.Outputs.matching_docs.send(None) self.Outputs.other_docs.send(None) def send_report(self): self.report_items(( ("Query", self.regexp_filter), ("Matching documents", self.n_matching), ))
class OWGEODatasets(OWWidget, ConcurrentWidgetMixin): name = "GEO Data Sets" description = "Access to Gene Expression Omnibus data sets." icon = "icons/OWGEODatasets.svg" priority = 2 class Outputs: gds_data = Output("Expression Data", Table) search_pattern = Setting('') auto_commit = Setting(True) genes_as_rows = Setting(False) mergeSpots = Setting(True) selected_gds = Setting(None) gdsSelectionStates = Setting({}) splitter_settings = Setting(( b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01', )) def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) self.gds_info: Optional[GDSInfo] = GDSInfo( ) # TODO: handle possible exceptions self.gds_data: Optional[Table] = None # Control area box = widgetBox(self.controlArea, 'Info', addSpace=True) self.infoBox = widgetLabel(box, 'Initializing\n\n') box = widgetBox(self.controlArea, 'Output', addSpace=True) radioButtonsInBox(box, self, 'genes_as_rows', ['Samples in rows', 'Genes in rows'], callback=self._run) separator(box) rubber(self.controlArea) auto_commit(self.controlArea, self, 'auto_commit', '&Commit', box=False) # Main Area # Filter widget self.filter = lineEdit(self.mainArea, self, 'search_pattern', 'Filter:', callbackOnType=True, callback=self._apply_filter) self.mainArea.layout().addWidget(self.filter) splitter_vertical = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter_vertical) # set table view self.table_view = QTableView(splitter_vertical) self.table_view.setShowGrid(False) self.table_view.setSortingEnabled(True) self.table_view.sortByColumn(1, Qt.AscendingOrder) self.table_view.setAlternatingRowColors(True) self.table_view.verticalHeader().setVisible(False) self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows) self.table_view.setSelectionMode(QAbstractItemView.SingleSelection) self.table_view.viewport().setMouseTracking(True) self.table_view.setSizeAdjustPolicy( QAbstractScrollArea.AdjustToContents) self.table_model = GEODatasetsModel() self.table_model.initialize(self.gds_info) self.table_view.setModel(self.table_model) self.table_model.show_table() self.table_view.horizontalHeader().setStretchLastSection(True) self.table_view.resizeColumnsToContents() v_header = self.table_view.verticalHeader() option = self.table_view.viewOptions() size = self.table_view.style().sizeFromContents( QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view) v_header.setDefaultSectionSize(size.height() + 2) v_header.setMinimumSectionSize(5) # set item delegates self.table_view.setItemDelegateForColumn( self.table_model.pubmedid_col, LinkStyledItemDelegate(self.table_view)) self.table_view.setItemDelegateForColumn( self.table_model.gds_id_col, LinkStyledItemDelegate(self.table_view)) self.table_view.setItemDelegateForColumn( self.table_model.indicator_col, IndicatorItemDelegate(self.table_view, role=Qt.DisplayRole), ) splitter_horizontal = QSplitter(Qt.Horizontal, splitter_vertical) # Description Widget box = widgetBox(splitter_horizontal, 'Description') self.description_widget = widgetLabel(box, '') self.description_widget.setWordWrap(True) rubber(box) # Sample Annotations Widget box = widgetBox(splitter_horizontal, 'Sample Annotations') self.annotations_widget = QTreeWidget(box) self.annotations_widget.setHeaderLabels( ['Type (Sample annotations)', 'Sample count']) self.annotations_widget.setRootIsDecorated(True) box.layout().addWidget(self.annotations_widget) self._annotations_updating = False self.annotations_widget.itemChanged.connect( self.on_annotation_selection_changed) self.splitters = splitter_vertical, splitter_horizontal for sp, setting in zip(self.splitters, self.splitter_settings): sp.splitterMoved.connect(self._splitter_moved) sp.restoreState(setting) self.table_view.selectionModel().selectionChanged.connect( self.on_gds_selection_changed) self._apply_filter() self._run() def _splitter_moved(self, *args): self.splitter_settings = [ bytes(sp.saveState()) for sp in self.splitters ] def _set_description_widget(self): self.description_widget.setText( self.selected_gds.get('description', 'Description not available.')) def _set_annotations_widget(self, gds): self._annotations_updating = True self.annotations_widget.clear() annotations = defaultdict(set) subsets_count = {} for desc in gds['subsets']: annotations[desc['type']].add(desc['description']) subsets_count[desc['description']] = str(len(desc['sample_id'])) for _type, subsets in annotations.items(): key = (gds["name"], _type) parent = QTreeWidgetItem(self.annotations_widget, [_type]) parent.key = key for subset in subsets: key = (gds['name'], _type, subset) item = QTreeWidgetItem( parent, [subset, subsets_count.get(subset, '')]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) item.key = key self._annotations_updating = False self.annotations_widget.expandAll() for i in range(self.annotations_widget.columnCount()): self.annotations_widget.resizeColumnToContents(i) def _set_selection(self): if self.selected_gds is not None: index = self.table_model.get_row_index( self.selected_gds.get('name')) if index is not None: self.table_view.selectionModel().blockSignals(True) self.table_view.selectRow(index) self._handle_selection_changed() self.table_view.selectionModel().blockSignals(False) def _handle_selection_changed(self): if self.table_model.table is not None: selection = self.table_view.selectionModel().selectedRows( self.table_model.gds_id_col) selected_gds_name = selection[0].data( ) if len(selection) > 0 else None if selected_gds_name: self.selected_gds = self.table_model.info.get( selected_gds_name) self._set_annotations_widget(self.selected_gds) self._set_description_widget() else: self.annotations_widget.clear() self.description_widget.clear() self.update_info() def _apply_filter(self): if self.table_model.table is not None: self.table_model.show_table( filter_pattern=str(self.search_pattern)) self._set_selection() self.update_info() def _run(self): if self.selected_gds is not None: self.gds_data = None self.start(run_download_task, self.selected_gds.get('name'), self.get_selected_samples(), self.genes_as_rows) def on_gds_selection_changed(self): self._handle_selection_changed() self._run() def on_annotation_selection_changed(self): if self._annotations_updating: return for i in range(self.annotations_widget.topLevelItemCount()): item = self.annotations_widget.topLevelItem(i) if 'key' in item.__dict__: self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) if 'key' in child.__dict__: self.gdsSelectionStates[child.key] = child.checkState(0) self._run() def update_info(self): all_gds = len(self.table_model.info) text = "{} datasets\n{} datasets cached\n".format( all_gds, len(local_files.listfiles())) filtered = self.table_view.model().rowCount() if all_gds != filtered: text += "{} after filtering".format(filtered) self.infoBox.setText(text) def get_selected_samples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. TODO: this could probably be simplified. """ def childiter(item): """ Iterate over the children of an QTreeWidgetItem instance. """ for i in range(item.childCount()): yield item.child(i) samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotations_widget.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) _samples = defaultdict(list) for sample, sample_type in samples: _samples[sample].append(sample_type) return _samples def commit(self): self.Outputs.gds_data.send(self.gds_data) def on_done(self, result: Result): assert isinstance(result.gds_dataset, Table) self.gds_data = result.gds_dataset self.commit() if self.gds_info: self.table_model.initialize(self.gds_info) self._apply_filter() def on_partial_result(self, result: Any) -> None: pass def onDeleteWidget(self): self.shutdown() super().onDeleteWidget() def send_report(self): self.report_items( "GEO Dataset", [ ("ID", self.selected_gds['name']), ("Title", self.selected_gds['title']), ("Organism", self.selected_gds['sample_organism']), ], ) self.report_items( "Data", [ ("Samples", self.selected_gds['sample_count']), ("Features", self.selected_gds['variables']), ("Genes", self.selected_gds['genes']), ], ) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.selected_gds['subsets']: subsets[subset['type']].append( (subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for _type in subsets: self.report_html += "<b>" + _type + ":</b></br>" for desc, count in subsets[_type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format( desc, count) self.report_html += "</ul>"
class OWGenes(OWWidget, ConcurrentWidgetMixin): name = "Genes" description = "Tool for working with genes" icon = "../widgets/icons/OWGeneInfo.svg" priority = 40 want_main_area = True selected_organism: int = Setting(11) search_pattern: str = Setting('') exclude_unmatched = Setting(True) replace_id_with_symbol = Setting(True) auto_commit = Setting(True) settingsHandler = DomainContextHandler() selected_gene_col = ContextSetting(None) use_attr_names = ContextSetting(True) replaces = [ 'orangecontrib.bioinformatics.widgets.OWGeneNameMatcher.OWGeneNameMatcher' ] class Inputs: data_table = Input("Data", Table) class Outputs: data_table = Output("Data", Table) gene_matcher_results = Output("Genes", Table) class Information(OWWidget.Information): pass def sizeHint(self): return QSize(1280, 960) def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) # ATTRIBUTES # self.target_database = ENTREZ_ID # input data self.input_data = None self.input_genes = None self.tax_id = None self.column_candidates = [] # input options self.organisms = [] # gene matcher self.gene_matcher = None # progress bar self.progress_bar = None self._timer = QTimer() self._timer.timeout.connect(self._apply_filter) self._timer.setSingleShot(True) # GUI SECTION # # Control area self.info_box = widgetLabel( widgetBox(self.controlArea, "Info", addSpace=True), 'No data on input.\n') organism_box = vBox(self.controlArea, 'Organism') self.organism_select_combobox = comboBox( organism_box, self, 'selected_organism', callback=self.on_input_option_change) self.get_available_organisms() self.organism_select_combobox.setCurrentIndex(self.selected_organism) box = widgetBox(self.controlArea, 'Gene IDs in the input data') self.gene_columns_model = itemmodels.DomainModel( valid_types=(StringVariable, DiscreteVariable)) self.gene_column_combobox = comboBox( box, self, 'selected_gene_col', label='Stored in data column', model=self.gene_columns_model, sendSelectedValue=True, callback=self.on_input_option_change, ) self.attr_names_checkbox = checkBox( box, self, 'use_attr_names', 'Stored as feature (column) names', disables=[(-1, self.gene_column_combobox)], callback=self.on_input_option_change, ) self.gene_column_combobox.setDisabled(bool(self.use_attr_names)) output_box = vBox(self.controlArea, 'Output') # separator(output_box) # output_box.layout().addWidget(horizontal_line()) # separator(output_box) self.exclude_radio = checkBox(output_box, self, 'exclude_unmatched', 'Exclude unmatched genes', callback=self.commit) self.replace_radio = checkBox(output_box, self, 'replace_id_with_symbol', 'Replace feature IDs with gene names', callback=self.commit) auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) rubber(self.controlArea) # Main area self.filter = lineEdit(self.mainArea, self, 'search_pattern', 'Filter:', callbackOnType=True, callback=self.handle_filter_callback) # rubber(self.radio_group) self.mainArea.layout().addWidget(self.filter) # set splitter self.splitter = QSplitter() self.splitter.setOrientation(Qt.Vertical) self.table_model = GeneInfoModel() self.table_view = QTableView() self.table_view.setAlternatingRowColors(True) self.table_view.viewport().setMouseTracking(True) self.table_view.setSortingEnabled(True) self.table_view.setShowGrid(False) self.table_view.verticalHeader().hide() # self.table_view.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.unknown_model = UnknownGeneInfoModel() self.unknown_view = QTableView() self.unknown_view.setModel(self.unknown_model) self.unknown_view.verticalHeader().hide() self.unknown_view.setShowGrid(False) self.unknown_view.setSelectionMode(QAbstractItemView.NoSelection) self.unknown_view.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) self.splitter.addWidget(self.table_view) self.splitter.addWidget(self.unknown_view) self.splitter.setStretchFactor(0, 90) self.splitter.setStretchFactor(1, 10) self.mainArea.layout().addWidget(self.splitter) def handle_filter_callback(self): self._timer.stop() self._timer.start(500) def _apply_filter(self): # filter only if input data is present and model is populated if self.table_model.table is not None: self.table_model.update_model( filter_pattern=str(self.search_pattern)) self.commit() def __reset_widget_state(self): self.table_view.clearSpans() self.table_view.setModel(None) self.table_model.clear() self.unknown_model.clear() self._update_info_box() def _update_info_box(self): if self.input_genes and self.gene_matcher: num_genes = len(self.gene_matcher.genes) known_genes = len(self.gene_matcher.get_known_genes()) info_text = ('{} genes in input data\n' '{} genes match Entrez database\n' '{} genes with match conflicts\n'.format( num_genes, known_genes, num_genes - known_genes)) else: info_text = 'No data on input.' self.info_box.setText(info_text) def on_done(self, _): # update info box self._update_info_box() # set output options self.toggle_radio_options() # set known genes self.table_model.initialize(self.gene_matcher.genes) self.table_view.setModel(self.table_model) self.table_view.selectionModel().selectionChanged.connect(self.commit) self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows) self.table_view.setItemDelegateForColumn( self.table_model.entrez_column_index, LinkStyledItemDelegate(self.table_view)) v_header = self.table_view.verticalHeader() option = self.table_view.viewOptions() size = self.table_view.style().sizeFromContents( QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view) v_header.setDefaultSectionSize(size.height() + 2) v_header.setMinimumSectionSize(5) self.table_view.horizontalHeader().setStretchLastSection(True) # set unknown genes self.unknown_model.initialize(self.gene_matcher.genes) self.unknown_view.verticalHeader().setStretchLastSection(True) self._apply_filter() def get_available_organisms(self): available_organism = sorted(((tax_id, taxonomy.name(tax_id)) for tax_id in taxonomy.common_taxids()), key=lambda x: x[1]) self.organisms = [tax_id[0] for tax_id in available_organism] self.organism_select_combobox.addItems( [tax_id[1] for tax_id in available_organism]) def gene_names_from_table(self): """ Extract and return gene names from `Orange.data.Table`. """ self.input_genes = [] if self.input_data: if self.use_attr_names: self.input_genes = [ str(attr.name).strip() for attr in self.input_data.domain.attributes ] else: if self.selected_gene_col is None: self.selected_gene_col = self.gene_column_identifier() self.input_genes = [ str(e[self.selected_gene_col]) for e in self.input_data if not np.isnan(e[self.selected_gene_col]) ] def _update_gene_matcher(self): self.gene_names_from_table() self.gene_matcher = GeneMatcher(self.get_selected_organism(), auto_start=False) self.gene_matcher.genes = self.input_genes # self.gene_matcher.organism = self.get_selected_organism() def get_selected_organism(self): return self.organisms[self.selected_organism] def _run(self): if self.gene_matcher is not None: self.start(run_gene_matcher, self.gene_matcher) def on_input_option_change(self): self.__reset_widget_state() self._update_gene_matcher() self._run() def gene_column_identifier(self): """ Get most suitable column that stores genes. If there are several suitable columns, select the one with most unique values. Take the best one. """ # candidates -> (variable, num of unique values) candidates = ((col, np.unique(self.input_data.get_column_view(col)[0]).size) for col in self.gene_columns_model if isinstance(col, DiscreteVariable) or isinstance(col, StringVariable)) best_candidate, _ = sorted(candidates, key=lambda x: x[1])[-1] return best_candidate def find_genes_location(self): """ Try locate the genes in the input data when we first load the data. Proposed rules: - when no suitable feature names are present, check the columns. - find the most suitable column, that is, the one with most unique values. """ domain = self.input_data.domain if not domain.attributes: if self.selected_gene_col is None: self.selected_gene_col = self.gene_column_identifier() self.use_attr_names = False @Inputs.data_table def handle_input(self, data): self.closeContext() self.input_data = None self.input_genes = None self.__reset_widget_state() self.gene_columns_model.set_domain(None) self.selected_gene_col = None if data: self.input_data = data self.gene_columns_model.set_domain(self.input_data.domain) # check if input table has tax_id, human is used if tax_id is not found self.tax_id = str(self.input_data.attributes.get(TAX_ID, '9606')) # check for gene location. Default is that genes are attributes in the input table. self.use_attr_names = self.input_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, self.use_attr_names) if self.tax_id in self.organisms and not self.selected_organism: self.selected_organism = self.organisms.index(self.tax_id) self.openContext(self.input_data.domain) self.find_genes_location() self.on_input_option_change() def commit(self): selection = self.table_view.selectionModel().selectedRows( self.table_model.entrez_column_index) selected_genes = [row.data() for row in selection] if not len(selected_genes): selected_genes = self.table_model.get_filtered_genes() gene_ids = self.get_target_ids() known_genes = [gid for gid in gene_ids if gid != '?'] table = None gm_table = None if known_genes: # Genes are in rows (we have a column with genes). if not self.use_attr_names: if self.target_database in self.input_data.domain: gene_var = self.input_data.domain[self.target_database] metas = self.input_data.domain.metas else: gene_var = StringVariable(self.target_database) metas = self.input_data.domain.metas + (gene_var, ) domain = Domain(self.input_data.domain.attributes, self.input_data.domain.class_vars, metas) table = self.input_data.transform(domain) col, _ = table.get_column_view(gene_var) col[:] = gene_ids # filter selected rows selected_genes_set = set(selected_genes) selected_rows = [ row_index for row_index, row in enumerate(table) if str(row[gene_var]) in selected_genes_set ] # handle table attributes table.attributes[TAX_ID] = self.get_selected_organism() table.attributes[GENE_AS_ATTRIBUTE_NAME] = False table.attributes[GENE_ID_COLUMN] = self.target_database table = table[selected_rows] if selected_rows else table if self.exclude_unmatched: # create filter from selected column for genes only_known = table_filter.FilterStringList( gene_var, known_genes) # apply filter to the data table = table_filter.Values([only_known])(table) self.Outputs.data_table.send(table) # genes are are in columns (genes are features). else: domain = self.input_data.domain.copy() table = self.input_data.transform(domain) for gene in self.gene_matcher.genes: if gene.input_identifier in table.domain: table.domain[gene.input_identifier].attributes[ self.target_database] = (str(gene.gene_id) if gene.gene_id else '?') if self.replace_id_with_symbol: try: table.domain[gene.input_identifier].name = str( gene.symbol) except AttributeError: # TODO: missing gene symbol, need to handle this? pass # filter selected columns selected_genes_set = set(selected_genes) selected = [ column for column in table.domain.attributes if self.target_database in column.attributes and str(column.attributes[ self.target_database]) in selected_genes_set ] output_attrs = table.domain.attributes if selected: output_attrs = selected if self.exclude_unmatched: known_genes_set = set(known_genes) output_attrs = [ col for col in output_attrs if col.attributes[ self.target_database] in known_genes_set ] domain = Domain(output_attrs, table.domain.class_vars, table.domain.metas) table = table.from_table(domain, table) # handle table attributes table.attributes[TAX_ID] = self.get_selected_organism() table.attributes[GENE_AS_ATTRIBUTE_NAME] = True table.attributes[GENE_ID_ATTRIBUTE] = self.target_database gm_table = self.gene_matcher.to_data_table( selected_genes=selected_genes if selected_genes else None) self.Outputs.data_table.send(table) self.Outputs.gene_matcher_results.send(gm_table) def toggle_radio_options(self): self.replace_radio.setEnabled(bool(self.use_attr_names)) if self.gene_matcher.genes: # enable checkbox if unknown genes are detected self.exclude_radio.setEnabled( len(self.gene_matcher.genes) != len( self.gene_matcher.get_known_genes())) self.exclude_unmatched = len(self.gene_matcher.genes) != len( self.gene_matcher.get_known_genes()) def get_target_ids(self): return [ str(gene.gene_id) if gene.gene_id else '?' for gene in self.gene_matcher.genes ]
class OWCorpusViewer(OWWidget): name = "Corpus Viewer" description = "Display corpus contents." icon = "icons/CorpusViewer.svg" priority = 500 class Inputs: corpus = Input("Corpus", Corpus, replaces=["Data"]) class Outputs: matching_docs = Output("Matching Docs", Corpus, default=True) other_docs = Output("Other Docs", Corpus) corpus = Output("Corpus", Corpus) settingsHandler = PerfectDomainContextHandler( match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL ) search_indices = ContextSetting([], exclude_metas=False) # features included in search display_indices = ContextSetting([], exclude_metas=False) # features for display display_features = ContextSetting([], exclude_metas=False) selected_documents = ContextSetting([]) regexp_filter = ContextSetting("") show_tokens = Setting(False) autocommit = Setting(True) class Warning(OWWidget.Warning): no_feats_search = Msg('No features included in search.') no_feats_display = Msg('No features selected for display.') def __init__(self): super().__init__() self.corpus = None # Corpus self.corpus_docs = None # Documents generated from Corpus self.doc_webview = None # WebView for showing content self.search_features = [] # two copies are needed since Display allows drag & drop self.display_list_indices = [0] self.matches = 0 # Matches of the query # Info attributes self.update_info() info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Tokens: %(n_tokens)s') gui.label(info_box, self, 'Types: %(n_types)s') gui.label(info_box, self, 'Matching documents: %(n_matching)s') gui.label(info_box, self, 'Matches: %(n_matches)s') # Search features self.search_listbox = gui.listBox( self.controlArea, self, 'search_indices', 'search_features', selectionMode=QListView.ExtendedSelection, box='Search features', callback=self.search_features_changed) # Display features display_box = gui.widgetBox(self.controlArea, 'Display features') self.display_listbox = gui.listBox( display_box, self, 'display_list_indices', 'display_features', selectionMode=QListView.ExtendedSelection, callback=self.show_docs, enableDragDrop=True) self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens', 'Show Tokens && Tags', callback=self.show_docs) # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on') # Search self.filter_input = gui.lineEdit(self.mainArea, self, 'regexp_filter', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='RegExp Filter:', callback=self.refresh_search) # Main area self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, ) # Document list self.doc_list = QTableView() self.doc_list.setSelectionBehavior(QTableView.SelectRows) self.doc_list.setSelectionMode(QTableView.ExtendedSelection) self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers) self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.doc_list.horizontalHeader().setVisible(False) self.splitter.addWidget(self.doc_list) self.doc_list_model = QStandardItemModel(self) self.doc_list.setModel(self.doc_list_model) self.doc_list.selectionModel().selectionChanged.connect( self.selection_changed ) # Document contents self.doc_webview = gui.WebviewWidget(self.splitter, debug=False) self.mainArea.layout().addWidget(self.splitter) def copy_to_clipboard(self): text = self.doc_webview.selectedText() QApplication.clipboard().setText(text) @Inputs.corpus def set_data(self, corpus=None): self.closeContext() self.reset_widget() self.corpus = corpus self.search_features = [] if corpus is not None: domain = self.corpus.domain # Enable/disable tokens checkbox if not self.corpus.has_tokens(): self.show_tokens_checkbox.setCheckState(False) self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens()) self.search_features = list(filter_visible(chain(domain.variables, domain.metas))) self.display_features = list(filter_visible(chain(domain.variables, domain.metas))) self.search_indices = list(range(len(self.search_features))) self.display_indices = list(range(len(self.display_features))) self.selected_documents = [corpus.titles[0]] if \ corpus.titles is not None and len(corpus.titles) else [] self.openContext(self.corpus) self.display_list_indices = self.display_indices self.regenerate_docs() self.list_docs() self.update_info() self.set_selection() self.show_docs() self.commit() def reset_widget(self): # Corpus self.corpus = None self.corpus_docs = None self.display_features = [] # Widgets self.search_listbox.clear() self.display_listbox.clear() self.filter_input.clear() self.update_info() # Models/vars self.search_features.clear() self.search_indices.clear() self.display_indices.clear() self.doc_list_model.clear() # Warnings self.Warning.clear() # WebView self.doc_webview.setHtml('') def list_docs(self): """ List documents into the left scrolling area """ if self.corpus_docs is None: return # TODO: remove search_keyword?? search_keyword = self.regexp_filter.strip('|') matches = 0 try: reg = re.compile(search_keyword, re.IGNORECASE) except sre_constants.error: return self.doc_list_model.clear() for i, (doc, title, content) in enumerate(zip(self.corpus, self.corpus.titles, self.corpus_docs)): res = len(list(reg.finditer(content))) if self.regexp_filter else 0 if not self.regexp_filter or res: matches += res item = QStandardItem() item.setData(str(title), Qt.DisplayRole) item.setData(doc, Qt.UserRole) self.doc_list_model.appendRow(item) self.matches = matches def get_selected_documents_from_view(self) -> Set[str]: """ Returns ------- Set with names of selected documents in the QTableView """ return { i.data(Qt.DisplayRole) for i in self.doc_list.selectionModel().selectedRows() } def set_selection(self) -> None: """ Select documents in selected_documents attribute in the view """ view = self.doc_list model = view.model() previously_selected = self.selected_documents.copy() selection = QItemSelection() for row in range(model.rowCount()): document = model.data(model.index(row, 0), Qt.DisplayRole) if document in self.selected_documents: selection.append(QItemSelectionRange( view.model().index(row, 0), view.model().index(row, 0) )) view.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect ) if len(selection) == 0: # in cases when selection is empty qt's selection_changed is not # called and so we need to manually trigger show_docs self.show_docs() # select emmit selection change signal which causes calling # selection_changed when filtering it means that documents which # are currently filtered out get removed from self.selected_douments # we still want to keep them to be still selected after user removes # filter self.selected_documents = previously_selected def selection_changed(self) -> None: """ Function is called every time the selection changes - when user select new range of documents """ self.selected_documents = self.get_selected_documents_from_view() self.show_docs() self.commit() def show_docs(self): """ Show the selected documents in the right area """ HTML = ''' <!doctype html> <html> <head> <script type="text/javascript" src="resources/jquery-3.1.1.min.js"> </script> <script type="text/javascript" src="resources/jquery.mark.min.js"> </script> <script type="text/javascript" src="resources/highlighter.js"> </script> <meta charset='utf-8'> <style> table {{ border-collapse: collapse; }} mark {{ background: #FFCD28; }} tr > td {{ padding-bottom: 3px; padding-top: 3px; }} body {{ font-family: Helvetica; font-size: 10pt; }} .line {{ border-bottom: 1px solid #000; }} .separator {{ height: 5px; }} .variables {{ vertical-align: top; padding-right: 10px; }} .content {{ /* Adopted from https://css-tricks.com/snippets/css/prevent-long-urls-from-breaking-out-of-container/ */ /* These are technically the same, but use both */ overflow-wrap: break-word; word-wrap: break-word; -ms-word-break: break-all; /* This is the dangerous one in WebKit, as it breaks things wherever */ word-break: break-all; /* Instead use this non-standard one: */ word-break: break-word; /* Adds a hyphen where the word breaks, if supported (No Blink) */ -ms-hyphens: auto; -moz-hyphens: auto; -webkit-hyphens: auto; hyphens: auto; }} .token {{ padding: 3px; border: 1px #B0B0B0 solid; margin-right: 5px; margin-bottom: 5px; display: inline-block; }} img {{ max-width: 100%; }} </style> </head> <body> {} </body> </html> ''' self.display_indices = self.display_list_indices if self.corpus is None: return self.Warning.no_feats_display.clear() if len(self.display_indices) == 0: self.Warning.no_feats_display() if self.show_tokens: tokens = list(self.corpus.ngrams_iterator(include_postags=True)) marked_search_features = [f for i, f in enumerate(self.search_features) if i in self.search_indices] html = '<table>' for doc_count, index in enumerate(self.doc_list.selectionModel().selectedRows()): if doc_count > 0: # add split html += '<tr class="line separator"><td/><td/></tr>' \ '<tr class="separator"><td/><td/></tr>' row_ind = index.data(Qt.UserRole).row_index for ind in self.display_indices: feature = self.display_features[ind] value = str(index.data(Qt.UserRole)[feature.name]) if feature in marked_search_features: value = self.__mark_text(value) value = value.replace('\n', '<br/>') is_image = feature.attributes.get('type', '') == 'image' if is_image and value != '?': value = '<img src="{}"></img>'.format(value) html += '<tr><td class="variables"><strong>{}:</strong></td>' \ '<td class="content">{}</td></tr>'.format( feature.name, value) if self.show_tokens: html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \ '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format( token) for token in tokens[row_ind])) html += '</table>' base = QUrl.fromLocalFile(__file__) self.doc_webview.setHtml(HTML.format(html), base) def __mark_text(self, text): search_keyword = self.regexp_filter.strip('|') if not search_keyword: return text try: reg = re.compile(search_keyword, re.IGNORECASE | re.MULTILINE) except sre_constants.error: return text matches = list(reg.finditer(text)) if not matches: return text text = list(text) for m in matches[::-1]: text[m.start():m.end()] = list('<mark data-markjs="true">{}</mark>'\ .format("".join(text[m.start():m.end()]))) return "".join(text) def search_features_changed(self): self.regenerate_docs() self.refresh_search() def regenerate_docs(self): self.corpus_docs = None self.Warning.no_feats_search.clear() if self.corpus is not None: feats = [self.search_features[i] for i in self.search_indices] if len(feats) == 0: self.Warning.no_feats_search() self.corpus_docs = self.corpus.documents_from_features(feats) def refresh_search(self): if self.corpus is not None: self.list_docs() self.set_selection() self.update_info() self.commit() def update_info(self): if self.corpus is not None: self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), len(self.corpus)) self.n_matches = self.matches if self.matches else 'n/a' self.n_tokens = sum(map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a' self.n_types = len(self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a' else: self.n_matching = '' self.n_matches = '' self.n_tokens = '' self.n_types = '' def commit(self): matched = unmatched = annotated_corpus = None corpus = self.corpus if corpus is not None: # it returns a set of selected documents which are in view selected_docs = self.get_selected_documents_from_view() titles = corpus.titles matched_mask = [ i for i, t in enumerate(titles) if t in selected_docs ] unmatched_mask = [ i for i, t in enumerate(titles) if t not in selected_docs ] matched = corpus[matched_mask] if len(matched_mask) else None unmatched = corpus[unmatched_mask] if len(unmatched_mask) else None annotated_corpus = create_annotated_table(corpus, matched_mask) self.Outputs.matching_docs.send(matched) self.Outputs.other_docs.send(unmatched) self.Outputs.corpus.send(annotated_corpus) def send_report(self): self.report_items(( ("Query", self.regexp_filter), ("Matching documents", self.n_matching), ("Matches", self.n_matches) )) def showEvent(self, event): super().showEvent(event) self.update_splitter() def update_splitter(self): """ Update splitter that document list on the left never take more than 1/3 of the space. It is only set on showEvent. If user later changes sizes it stays as it is. """ w1, w2 = self.splitter.sizes() ws = w1 + w2 if w2 < 2/3 * ws: self.splitter.setSizes([ws * 1/3, ws * 2/3])
class OWCorpusViewer(OWWidget): name = "Corpus Viewer" description = "Display corpus contents." icon = "icons/CorpusViewer.svg" priority = 500 class Inputs: corpus = Input("Corpus", Corpus, replaces=["Data"]) class Outputs: matching_docs = Output("Matching Docs", Corpus, default=True) other_docs = Output("Other Docs", Corpus) settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL) search_indices = ContextSetting( [], exclude_metas=False) # features included in search display_indices = ContextSetting( [], exclude_metas=False) # features for display display_features = ContextSetting([], exclude_metas=False) regexp_filter = ContextSetting("") selection = [0] # TODO: DataHashContextHandler show_tokens = Setting(False) autocommit = Setting(True) class Warning(OWWidget.Warning): no_feats_search = Msg('No features included in search.') no_feats_display = Msg('No features selected for display.') def __init__(self): super().__init__() self.corpus = None # Corpus self.corpus_docs = None # Documents generated from Corpus self.output_mask = [] # Output corpus indices self.doc_webview = None # WebView for showing content self.search_features = [ ] # two copies are needed since Display allows drag & drop self.display_list_indices = [0] # Info attributes self.update_info() info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Documents: %(n_documents)s') gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s') gui.label(info_box, self, ' ◦ Tokens: %(n_tokens)s') gui.label(info_box, self, ' ◦ Types: %(n_types)s') gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s') gui.label(info_box, self, 'N-grams range: %(ngram_range)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Search features self.search_listbox = gui.listBox( self.controlArea, self, 'search_indices', 'search_features', selectionMode=QListView.ExtendedSelection, box='Search features', callback=self.search_features_changed) # Display features display_box = gui.widgetBox(self.controlArea, 'Display features') self.display_listbox = gui.listBox( display_box, self, 'display_list_indices', 'display_features', selectionMode=QListView.ExtendedSelection, callback=self.show_docs, enableDragDrop=True) self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens', 'Show Tokens && Tags', callback=self.show_docs) # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on') # Search self.filter_input = gui.lineEdit(self.mainArea, self, 'regexp_filter', orientation=Qt.Horizontal, sizePolicy=QSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='RegExp Filter:') self.filter_input.textChanged.connect(self.refresh_search) # Main area self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, ) # Document list self.doc_list = QTableView() self.doc_list.setSelectionBehavior(QTableView.SelectRows) self.doc_list.setSelectionMode(QTableView.ExtendedSelection) self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers) self.doc_list.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) self.doc_list.horizontalHeader().setVisible(False) self.splitter.addWidget(self.doc_list) self.doc_list_model = QStandardItemModel(self) self.doc_list.setModel(self.doc_list_model) self.doc_list.selectionModel().selectionChanged.connect(self.show_docs) # Document contents self.doc_webview = gui.WebviewWidget(self.splitter, debug=False) self.doc_webview.loadFinished.connect(self.highlight_docs) self.mainArea.layout().addWidget(self.splitter) def copy_to_clipboard(self): text = self.doc_webview.selectedText() QApplication.clipboard().setText(text) @Inputs.corpus def set_data(self, corpus=None): self.closeContext() self.reset_widget() self.corpus = corpus self.search_features = [] if corpus is not None: domain = self.corpus.domain # Enable/disable tokens checkbox if not self.corpus.has_tokens(): self.show_tokens_checkbox.setCheckState(False) self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens()) self.search_features = list( filter_visible(chain(domain.variables, domain.metas))) self.display_features = list( filter_visible(chain(domain.variables, domain.metas))) self.search_indices = list(range(len(self.search_features))) self.display_indices = list(range(len(self.display_features))) self.selection = [0] self.openContext(self.corpus) self.display_list_indices = self.display_indices self.regenerate_docs() self.list_docs() self.update_info() self.set_selection() self.show_docs() self.commit() def reset_widget(self): # Corpus self.corpus = None self.corpus_docs = None self.output_mask = [] self.display_features = [] # Widgets self.search_listbox.clear() self.display_listbox.clear() self.filter_input.clear() self.update_info() # Models/vars self.search_features.clear() self.search_indices.clear() self.display_indices.clear() self.doc_list_model.clear() # Warnings self.Warning.clear() # WebView self.doc_webview.setHtml('') def list_docs(self): """ List documents into the left scrolling area """ if self.corpus_docs is None: return search_keyword = self.regexp_filter.strip('|') try: reg = re.compile(search_keyword, re.IGNORECASE) except sre_constants.error: return def is_match(x): return not bool(search_keyword) or reg.search(x) self.output_mask.clear() self.doc_list_model.clear() for i, (doc, title, content) in enumerate( zip(self.corpus, self.corpus.titles, self.corpus_docs)): if is_match(content): item = QStandardItem() item.setData(title, Qt.DisplayRole) item.setData(doc, Qt.UserRole) self.doc_list_model.appendRow(item) self.output_mask.append(i) def reset_selection(self): if self.doc_list_model.rowCount() > 0: self.doc_list.selectRow(0) # Select the first document else: self.doc_webview.setHtml('') def set_selection(self): view = self.doc_list if len(self.selection): selection = QItemSelection() for row in self.selection: selection.append( QItemSelectionRange(view.model().index(row, 0), view.model().index(row, 0))) view.selectionModel().select(selection, QItemSelectionModel.ClearAndSelect) def show_docs(self): """ Show the selected documents in the right area """ HTML = ''' <!doctype html> <html> <head> <script type="text/javascript" src="resources/jquery-3.1.1.min.js"> </script> <script type="text/javascript" src="resources/jquery.mark.min.js"> </script> <script type="text/javascript" src="resources/highlighter.js"> </script> <meta charset='utf-8'> <style> table {{ border-collapse: collapse; }} mark {{ background: #FFCD28; }} tr > td {{ padding-bottom: 3px; padding-top: 3px; }} body {{ font-family: Helvetica; font-size: 10pt; }} .line {{ border-bottom: 1px solid #000; }} .separator {{ height: 5px; }} .variables {{ vertical-align: top; padding-right: 10px; }} .token {{ padding: 3px; border: 1px #B0B0B0 solid; margin-right: 5px; margin-bottom: 5px; display: inline-block; }} img {{ max-width: 100%; }} </style> </head> <body> {} </body> </html> ''' self.display_indices = self.display_list_indices if self.corpus is None: return self.Warning.no_feats_display.clear() if len(self.display_indices) == 0: self.Warning.no_feats_display() if self.show_tokens: tokens = list(self.corpus.ngrams_iterator(include_postags=True)) marked_search_features = [ f for i, f in enumerate(self.search_features) if i in self.search_indices ] html = '<table>' selection = [ i.row() for i in self.doc_list.selectionModel().selectedRows() ] if selection != []: self.selection = selection for doc_count, index in enumerate( self.doc_list.selectionModel().selectedRows()): if doc_count > 0: # add split html += '<tr class="line separator"><td/><td/></tr>' \ '<tr class="separator"><td/><td/></tr>' row_ind = index.data(Qt.UserRole).row_index for ind in self.display_indices: feature = self.display_features[ind] mark = 'class="mark-area"' if feature in marked_search_features else '' value = str(index.data(Qt.UserRole)[feature.name]) is_image = feature.attributes.get('type', '') == 'image' if is_image and value != '?': value = '<img src="{}"></img>'.format(value) html += '<tr><td class="variables"><strong>{}:</strong></td>' \ '<td {}>{}</td></tr>'.format( feature.name, mark, value) if self.show_tokens: html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \ '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format( token) for token in tokens[row_ind])) html += '</table>' base = QUrl.fromLocalFile(__file__) self.doc_webview.setHtml(HTML.format(html), base) def search_features_changed(self): self.regenerate_docs() self.refresh_search() def regenerate_docs(self): self.corpus_docs = None self.Warning.no_feats_search.clear() if self.corpus is not None: feats = [self.search_features[i] for i in self.search_indices] if len(feats) == 0: self.Warning.no_feats_search() self.corpus_docs = self.corpus.documents_from_features(feats) def refresh_search(self): if self.corpus is not None: self.list_docs() self.reset_selection() self.update_info() self.commit() @Slot() def highlight_docs(self): search_keyword = self.regexp_filter.\ strip('|').replace('\\', '\\\\') # escape one \ to two for mark.js if search_keyword: # mark is undefined when clearing the view (`setHtml('')`). Maybe # set and template html with all the scripts, ... but no contents? self.doc_webview.runJavaScript(''' if (typeof mark !== "undefined") {{ mark("{}"); }} '''.format(search_keyword)) def update_info(self): if self.corpus is not None: self.n_documents = len(self.corpus) self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), self.n_documents) self.n_tokens = sum( map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a' self.n_types = len( self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a' self.is_preprocessed = self.corpus.has_tokens() self.is_pos_tagged = self.corpus.pos_tags is not None self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range) else: self.n_documents = '' self.n_matching = '' self.n_tokens = '' self.n_types = '' self.is_preprocessed = '' self.is_pos_tagged = '' self.ngram_range = '' def commit(self): if self.corpus is not None: matched = self.corpus[self.output_mask] output_mask = set(self.output_mask) unmatched_mask = [ i for i in range(len(self.corpus)) if i not in output_mask ] unmatched = self.corpus[unmatched_mask] self.Outputs.matching_docs.send(matched) self.Outputs.other_docs.send(unmatched) else: self.Outputs.matching_docs.send(None) self.Outputs.other_docs.send(None)
class OWCreateInstance(OWWidget): name = "Create Instance" description = "Interactively create a data instance from sample dataset." icon = "icons/CreateInstance.svg" category = "Data" keywords = ["simulator"] priority = 4000 class Inputs: data = Input("Data", Table) reference = Input("Reference", Table) class Outputs: data = Output("Data", Table) class Information(OWWidget.Information): nans_removed = Msg("Variables with only missing values were " "removed from the list.") want_main_area = False ACTIONS = ["median", "mean", "random", "input"] HEADER = [["name", "Variable"], ["variable", "Value"]] Header = namedtuple("header", [tag for tag, _ in HEADER])(*range(len(HEADER))) values: Dict[str, Union[float, str]] = Setting({}, schema_only=True) append_to_data = Setting(True) auto_commit = Setting(True) def __init__(self): super().__init__() self.data: Optional[Table] = None self.reference: Optional[Table] = None self.filter_edit = QLineEdit(textChanged=self.__filter_edit_changed, placeholderText="Filter...") self.view = QTableView(sortingEnabled=True, contextMenuPolicy=Qt.CustomContextMenu, selectionMode=QTableView.NoSelection) self.view.customContextMenuRequested.connect(self.__menu_requested) self.view.setItemDelegateForColumn(self.Header.variable, VariableDelegate(self)) self.view.verticalHeader().hide() self.view.horizontalHeader().setStretchLastSection(True) self.view.horizontalHeader().setMaximumSectionSize(350) self.model = VariableItemModel(self) self.model.setHorizontalHeaderLabels([x for _, x in self.HEADER]) self.model.dataChanged.connect(self.__table_data_changed) self.model.dataHasNanColumn.connect(self.Information.nans_removed) self.proxy_model = QSortFilterProxyModel() self.proxy_model.setFilterKeyColumn(-1) self.proxy_model.setFilterCaseSensitivity(False) self.proxy_model.setSourceModel(self.model) self.view.setModel(self.proxy_model) vbox = gui.vBox(self.controlArea, box=True) vbox.layout().addWidget(self.filter_edit) vbox.layout().addWidget(self.view) box = gui.hBox(vbox) gui.rubber(box) for name in self.ACTIONS: gui.button(box, self, name.capitalize(), lambda *args, fun=name: self._initialize_values(fun), autoDefault=False) gui.rubber(box) box = gui.auto_apply(self.controlArea, self, "auto_commit") box.button.setFixedWidth(180) box.layout().insertStretch(0) # pylint: disable=unnecessary-lambda append = gui.checkBox(None, self, "append_to_data", "Append this instance to input data", callback=lambda: self.commit()) box.layout().insertWidget(0, append) self._set_input_summary() self._set_output_summary() self.settingsAboutToBePacked.connect(self.pack_settings) def __filter_edit_changed(self): self.proxy_model.setFilterFixedString(self.filter_edit.text().strip()) def __table_data_changed(self): self.commit() def __menu_requested(self, point: QPoint): index = self.view.indexAt(point) model: QSortFilterProxyModel = index.model() source_index = model.mapToSource(index) menu = QMenu(self) for action in self._create_actions(source_index): menu.addAction(action) menu.popup(self.view.viewport().mapToGlobal(point)) def _create_actions(self, index: QModelIndex) -> List[QAction]: actions = [] for name in self.ACTIONS: action = QAction(name.capitalize(), self) action.triggered.connect( lambda *args, fun=name: self._initialize_values(fun, [index])) actions.append(action) return actions def _initialize_values(self, fun: str, indices: List[QModelIndex] = None): cont_fun = { "median": np.nanmedian, "mean": np.nanmean, "random": cont_random, "input": np.nanmean }.get(fun, NotImplemented) disc_fun = { "median": majority, "mean": majority, "random": disc_random, "input": majority }.get(fun, NotImplemented) if not self.data or fun == "input" and not self.reference: return self.model.dataChanged.disconnect(self.__table_data_changed) rows = range(self.proxy_model.rowCount()) if indices is None else \ [index.row() for index in indices] for row in rows: index = self.model.index(row, self.Header.variable) variable = self.model.data(index, VariableRole) if fun == "input": if variable not in self.reference.domain: continue values = self.reference.get_column_view(variable)[0] if variable.is_primitive(): values = values.astype(float) if all(np.isnan(values)): continue else: values = self.model.data(index, ValuesRole) if variable.is_continuous: value = cont_fun(values) value = round(value, variable.number_of_decimals) elif variable.is_discrete: value = disc_fun(values) elif variable.is_string: value = "" else: raise NotImplementedError self.model.setData(index, value, ValueRole) self.model.dataChanged.connect(self.__table_data_changed) self.commit() @Inputs.data def set_data(self, data: Table): self.data = data self._set_input_summary() self._set_model_data() self.unconditional_commit() def _set_model_data(self): self.Information.nans_removed.clear() self.model.removeRows(0, self.model.rowCount()) if not self.data: return self.model.set_data(self.data, self.values) self.values = {} self.view.horizontalHeader().setStretchLastSection(False) self.view.resizeColumnsToContents() self.view.resizeRowsToContents() self.view.horizontalHeader().setStretchLastSection(True) @Inputs.reference def set_reference(self, data: Table): self.reference = data self._set_input_summary() def _set_input_summary(self): n_data = len(self.data) if self.data else 0 n_refs = len(self.reference) if self.reference else 0 summary, details, kwargs = self.info.NoInput, "", {} if self.data or self.reference: summary = f"{self.info.format_number(n_data)}, " \ f"{self.info.format_number(n_refs)}" data_list = [("Data", self.data), ("Reference", self.reference)] details = format_multiple_summaries(data_list) kwargs = {"format": Qt.RichText} self.info.set_input_summary(summary, details, **kwargs) def _set_output_summary(self, data: Optional[Table] = None): if data: summary, details = len(data), format_summary_details(data) else: summary, details = self.info.NoOutput, "" self.info.set_output_summary(summary, details) def commit(self): output_data = None if self.data: output_data = self._create_data_from_values() if self.append_to_data: output_data = self._append_to_data(output_data) self._set_output_summary(output_data) self.Outputs.data.send(output_data) def _create_data_from_values(self) -> Table: data = Table.from_domain(self.data.domain, 1) data.name = "created" data.X[:] = np.nan data.Y[:] = np.nan for i, m in enumerate(self.data.domain.metas): data.metas[:, i] = "" if m.is_string else np.nan values = self._get_values() for var_name, value in values.items(): data[:, var_name] = value return data def _append_to_data(self, data: Table) -> Table: assert self.data assert len(data) == 1 var = DiscreteVariable("Source ID", values=(self.data.name, data.name)) data = Table.concatenate([self.data, data], axis=0) domain = Domain(data.domain.attributes, data.domain.class_vars, data.domain.metas + (var, )) data = data.transform(domain) data.metas[:len(self.data), -1] = 0 data.metas[len(self.data):, -1] = 1 return data def _get_values(self) -> Dict[str, Union[str, float]]: values = {} for row in range(self.model.rowCount()): index = self.model.index(row, self.Header.variable) values[self.model.data(index, VariableRole).name] = \ self.model.data(index, ValueRole) return values def send_report(self): if not self.data: return self.report_domain("Input", self.data.domain) self.report_domain("Output", self.data.domain) items = [] values: Dict = self._get_values() for var in self.data.domain.variables + self.data.domain.metas: val = values.get(var.name, np.nan) if var.is_primitive(): val = var.repr_val(val) items.append([f"{var.name}:", val]) self.report_table("Values", items) @staticmethod def sizeHint(): return QSize(600, 500) def pack_settings(self): self.values: Dict[str, Union[str, float]] = self._get_values()
class OWRank(OWWidget): name = "Rank" description = "Rank and filter data features by their relevance." icon = "icons/Rank.svg" priority = 1102 buttons_area_orientation = Qt.Vertical inputs = [("Data", Table, "setData"), ("Scorer", score.Scorer, "set_learner", widget.Multiple)] outputs = [("Reduced Data", Table, widget.Default), ("Scores", Table)] SelectNone, SelectAll, SelectManual, SelectNBest = range(4) cls_default_selected = Setting({"Gain Ratio", "Gini Decrease"}) reg_default_selected = Setting({"Univariate Linear Regression", "RReliefF"}) selectMethod = Setting(SelectNBest) nSelected = Setting(5) auto_apply = Setting(True) # Header state for discrete/continuous/no_class scores headerState = Setting([None, None, None]) settings_version = 1 settingsHandler = DomainContextHandler() selected_rows = ContextSetting([]) gain = inf_gain = gini = anova = chi2 = ulr = relief = rrelief = fcbc = True _score_vars = ["gain", "inf_gain", "gini", "anova", "chi2", "relief", "fcbc", "ulr", "rrelief"] class Warning(OWWidget.Warning): no_target_var = Msg("Data does not have a target variable") class Error(OWWidget.Error): invalid_type = Msg("Cannot handle target variable type {}") inadequate_learner = Msg("{}") def __init__(self): super().__init__() self.measure_scores = None self.update_scores = True self.usefulAttributes = [] self.learners = {} self.labels = [] self.out_domain_desc = None self.all_measures = SCORES self.selectedMeasures = dict([(m.name, True) for m in self.all_measures]) # Discrete (0) or continuous (1) class mode self.rankMode = 0 self.data = None self.discMeasures = [m for m in self.all_measures if issubclass(DiscreteVariable, m.score.class_type)] self.contMeasures = [m for m in self.all_measures if issubclass(ContinuousVariable, m.score.class_type)] self.score_checks = [] self.cls_scoring_box = gui.vBox(None, "Scoring for Classification") self.reg_scoring_box = gui.vBox(None, "Scoring for Regression") boxes = [self.cls_scoring_box] * 7 + [self.reg_scoring_box] * 2 for _score, var, box in zip(SCORES, self._score_vars, boxes): check = gui.checkBox( box, self, var, label=_score.name, callback=lambda val=_score: self.measuresSelectionChanged(val)) self.score_checks.append(check) self.score_stack = QStackedWidget(self) self.score_stack.addWidget(self.cls_scoring_box) self.score_stack.addWidget(self.reg_scoring_box) self.score_stack.addWidget(QWidget()) self.controlArea.layout().addWidget(self.score_stack) gui.rubber(self.controlArea) selMethBox = gui.vBox( self.controlArea, "Select Attributes", addSpace=True) grid = QGridLayout() grid.setContentsMargins(6, 0, 6, 0) self.selectButtons = QButtonGroup() self.selectButtons.buttonClicked[int].connect(self.setSelectMethod) def button(text, buttonid, toolTip=None): b = QRadioButton(text) self.selectButtons.addButton(b, buttonid) if toolTip is not None: b.setToolTip(toolTip) return b b1 = button(self.tr("None"), OWRank.SelectNone) b2 = button(self.tr("All"), OWRank.SelectAll) b3 = button(self.tr("Manual"), OWRank.SelectManual) b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest) s = gui.spin(selMethBox, self, "nSelected", 1, 100, callback=self.nSelectedChanged) grid.addWidget(b1, 0, 0) grid.addWidget(b2, 1, 0) grid.addWidget(b3, 2, 0) grid.addWidget(b4, 3, 0) grid.addWidget(s, 3, 1) self.selectButtons.button(self.selectMethod).setChecked(True) selMethBox.layout().addLayout(grid) gui.auto_commit(selMethBox, self, "auto_apply", "Send", box=False) # Discrete, continuous and no_class table views are stacked self.ranksViewStack = QStackedLayout() self.mainArea.layout().addLayout(self.ranksViewStack) self.discRanksView = QTableView() self.ranksViewStack.addWidget(self.discRanksView) self.discRanksView.setSelectionBehavior(QTableView.SelectRows) self.discRanksView.setSelectionMode(QTableView.MultiSelection) self.discRanksView.setSortingEnabled(True) self.discRanksLabels = ["#"] + [m.shortname for m in self.discMeasures] self.discRanksModel = QStandardItemModel(self) self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels) self.discRanksProxyModel = MySortProxyModel(self) self.discRanksProxyModel.setSourceModel(self.discRanksModel) self.discRanksView.setModel(self.discRanksProxyModel) self.discRanksView.setColumnWidth(0, 20) self.discRanksView.selectionModel().selectionChanged.connect( self.commit ) self.discRanksView.pressed.connect(self.onSelectItem) self.discRanksView.horizontalHeader().sectionClicked.connect( self.headerClick ) self.discRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem ) if self.headerState[0] is not None: self.discRanksView.horizontalHeader().restoreState( self.headerState[0]) self.contRanksView = QTableView() self.ranksViewStack.addWidget(self.contRanksView) self.contRanksView.setSelectionBehavior(QTableView.SelectRows) self.contRanksView.setSelectionMode(QTableView.MultiSelection) self.contRanksView.setSortingEnabled(True) self.contRanksLabels = ["#"] + [m.shortname for m in self.contMeasures] self.contRanksModel = QStandardItemModel(self) self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels) self.contRanksProxyModel = MySortProxyModel(self) self.contRanksProxyModel.setSourceModel(self.contRanksModel) self.contRanksView.setModel(self.contRanksProxyModel) self.contRanksView.setColumnWidth(0, 20) self.contRanksView.selectionModel().selectionChanged.connect( self.commit ) self.contRanksView.pressed.connect(self.onSelectItem) self.contRanksView.horizontalHeader().sectionClicked.connect( self.headerClick ) self.contRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem ) if self.headerState[1] is not None: self.contRanksView.horizontalHeader().restoreState( self.headerState[1]) self.noClassRanksView = QTableView() self.ranksViewStack.addWidget(self.noClassRanksView) self.noClassRanksView.setSelectionBehavior(QTableView.SelectRows) self.noClassRanksView.setSelectionMode(QTableView.MultiSelection) self.noClassRanksView.setSortingEnabled(True) self.noClassRanksLabels = ["#"] self.noClassRanksModel = QStandardItemModel(self) self.noClassRanksModel.setHorizontalHeaderLabels(self.noClassRanksLabels) self.noClassRanksProxyModel = MySortProxyModel(self) self.noClassRanksProxyModel.setSourceModel(self.noClassRanksModel) self.noClassRanksView.setModel(self.noClassRanksProxyModel) self.noClassRanksView.setColumnWidth(0, 20) self.noClassRanksView.selectionModel().selectionChanged.connect( self.commit ) self.noClassRanksView.pressed.connect(self.onSelectItem) self.noClassRanksView.horizontalHeader().sectionClicked.connect( self.headerClick ) self.noClassRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem ) if self.headerState[2] is not None: self.noClassRanksView.horizontalHeader().restoreState( self.headerState[2]) # Switch the current view to Discrete self.switchRanksMode(0) self.resetInternals() self.updateDelegates() self.updateVisibleScoreColumns() self.resize(690, 500) self.measure_scores = table((len(self.measures), 0), None) def switchRanksMode(self, index): """ Switch between discrete/continuous/no_class mode """ self.rankMode = index self.ranksViewStack.setCurrentIndex(index) if index == 0: self.ranksView = self.discRanksView self.ranksModel = self.discRanksModel self.ranksProxyModel = self.discRanksProxyModel self.measures = self.discMeasures self.selected_checks = self.cls_default_selected self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) self.cls_scoring_box.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) elif index == 1: self.ranksView = self.contRanksView self.ranksModel = self.contRanksModel self.ranksProxyModel = self.contRanksProxyModel self.measures = self.contMeasures self.selected_checks = self.reg_default_selected self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) self.reg_scoring_box.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) else: self.ranksView = self.noClassRanksView self.ranksModel = self.noClassRanksModel self.ranksProxyModel = self.noClassRanksProxyModel self.measures = [] self.selected_checks = set() self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) shape = (len(self.measures) + len(self.learners), 0) self.measure_scores = table(shape, None) self.update_scores = False for check, score in zip(self.score_checks, SCORES): check.setChecked(score.name in self.selected_checks) self.update_scores = True self.score_stack.setCurrentIndex(index) self.updateVisibleScoreColumns() @check_sql_input def setData(self, data): self.closeContext() self.clear_messages() self.resetInternals() self.data = data self.switchRanksMode(0) if self.data is not None: domain = self.data.domain attrs = domain.attributes self.usefulAttributes = [attr for attr in attrs if attr.is_discrete or attr.is_continuous] if domain.has_continuous_class: self.switchRanksMode(1) elif not domain.class_var: self.Warning.no_target_var() self.switchRanksMode(2) elif not domain.has_discrete_class: self.Error.invalid_type(type(domain.class_var).__name__) if issparse(self.data.X): # keep only measures supporting sparse data self.measures = [m for m in self.measures if m.score.supports_sparse_data] self.ranksModel.setRowCount(len(attrs)) for i, a in enumerate(attrs): if a.is_discrete: v = len(a.values) else: v = "C" item = ScoreValueItem() item.setData(v, Qt.DisplayRole) self.ranksModel.setItem(i, 0, item) item = QStandardItem(a.name) item.setData(gui.attributeIconDict[a], Qt.DecorationRole) self.ranksModel.setVerticalHeaderItem(i, item) shape = (len(self.measures) + len(self.learners), len(attrs)) self.measure_scores = table(shape, None) self.updateScores() else: self.send("Scores", None) self.selected_rows = [] self.openContext(data) self.selectMethodChanged() self.commit() def get_selection(self): selection = self.ranksView.selectionModel().selection() return list(set(ind.row() for ind in selection.indexes())) def set_learner(self, learner, lid=None): if learner is None and lid is not None: del self.learners[lid] elif learner is not None: self.learners[lid] = score_meta( learner.name, learner.name, learner ) attrs_len = 0 if not self.data else len(self.data.domain.attributes) shape = (len(self.learners), attrs_len) self.measure_scores = self.measure_scores[:len(self.measures)] self.measure_scores += table(shape, None) self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels) self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels) self.noClassRanksModel.setHorizontalHeaderLabels( self.noClassRanksLabels) measures_mask = [False] * len(self.measures) measures_mask += [True for _ in self.learners] self.updateScores(measures_mask) self.commit() def updateScores(self, measuresMask=None): """ Update the current computed scores. If `measuresMask` is given it must be an list of bool values indicating what measures should be recomputed. """ if not self.data: return if self.data.has_missing(): self.information("Missing values have been imputed.") measures = self.measures + [v for k, v in self.learners.items()] if measuresMask is None: # Update all selected measures measuresMask = [self.selectedMeasures.get(m.name) for m in self.measures] measuresMask = measuresMask + [v.name for k, v in self.learners.items()] data = self.data learner_col = len(self.measures) if len(measuresMask) <= len(self.measures) or \ measuresMask[len(self.measures)]: self.labels = [] self.Error.inadequate_learner.clear() self.setStatusMessage("Running") with self.progressBar(): n_measure_update = len([x for x in measuresMask if x is not False]) count = 0 for index, (meas, mask) in enumerate(zip(measures, measuresMask)): if not mask: continue self.progressBarSet(90 * count / n_measure_update) count += 1 if index < len(self.measures): estimator = meas.score() try: self.measure_scores[index] = estimator(data) except ValueError: self.measure_scores[index] = [] for attr in data.domain.attributes: try: self.measure_scores[index].append( estimator(data, attr)) except ValueError: self.measure_scores[index].append(None) else: learner = meas.score if isinstance(learner, Learner) and \ not learner.check_learner_adequacy(self.data.domain): self.Error.inadequate_learner( learner.learner_adequacy_err_msg) scores = table((1, len(data.domain.attributes))) else: scores = meas.score.score_data(data) for i, row in enumerate(scores): self.labels.append(meas.shortname + str(i + 1)) if len(self.measure_scores) > learner_col: self.measure_scores[learner_col] = row else: self.measure_scores.append(row) learner_col += 1 self.progressBarSet(90) self.contRanksModel.setHorizontalHeaderLabels( self.contRanksLabels + self.labels ) self.discRanksModel.setHorizontalHeaderLabels( self.discRanksLabels + self.labels ) self.noClassRanksModel.setHorizontalHeaderLabels( self.noClassRanksLabels + self.labels ) self.updateRankModel(measuresMask) self.ranksProxyModel.invalidate() self.selectMethodChanged() self.send("Scores", self.create_scores_table(self.labels)) self.setStatusMessage("") def updateRankModel(self, measuresMask): """ Update the rankModel. """ values = [] diff = len(self.measure_scores) - len(measuresMask) if len(measuresMask): measuresMask += [measuresMask[-1]] * diff for i in range(self.ranksModel.columnCount() - 1, len(self.measure_scores), -1): self.ranksModel.removeColumn(i) for i, (scores, m) in enumerate(zip(self.measure_scores, measuresMask)): if not m and self.ranksModel.item(0, i + 1): values.append([]) continue values_one = [] for j, _score in enumerate(scores): values_one.append(_score) item = self.ranksModel.item(j, i + 1) if not item: item = ScoreValueItem() self.ranksModel.setItem(j, i + 1, item) item.setData(_score, Qt.DisplayRole) values.append(values_one) for i, (vals, m) in enumerate(zip(values, measuresMask)): if not m: continue valid_vals = [v for v in vals if v is not None] if valid_vals: vmin, vmax = min(valid_vals), max(valid_vals) for j, v in enumerate(vals): if v is not None: # Set the bar ratio role for i-th measure. ratio = float((v - vmin) / ((vmax - vmin) or 1)) item = self.ranksModel.item(j, i + 1) item.setData(ratio, gui.BarRatioRole) self.ranksView.setColumnWidth(0, 20) self.ranksView.resizeRowsToContents() def resetInternals(self): self.data = None self.usefulAttributes = [] self.ranksModel.setRowCount(0) def onSelectItem(self, index): """ Called when the user selects/unselects an item in the table view. """ self.selectMethod = OWRank.SelectManual # Manual self.selectButtons.button(self.selectMethod).setChecked(True) self.commit() def setSelectMethod(self, method): if self.selectMethod != method: self.selectMethod = method self.selectButtons.button(method).setChecked(True) self.selectMethodChanged() def selectMethodChanged(self): self.autoSelection() self.ranksView.setFocus() def nSelectedChanged(self): self.selectMethod = OWRank.SelectNBest self.selectButtons.button(self.selectMethod).setChecked(True) self.selectMethodChanged() def autoSelection(self): selModel = self.ranksView.selectionModel() rowCount = self.ranksModel.rowCount() columnCount = self.ranksModel.columnCount() model = self.ranksProxyModel if self.selectMethod == OWRank.SelectNone: selection = QItemSelection() elif self.selectMethod == OWRank.SelectAll: selection = QItemSelection( model.index(0, 0), model.index(rowCount - 1, columnCount - 1) ) elif self.selectMethod == OWRank.SelectNBest: nSelected = min(self.nSelected, rowCount) selection = QItemSelection( model.index(0, 0), model.index(nSelected - 1, columnCount - 1) ) else: selection = QItemSelection() if len(self.selected_rows): selection = QItemSelection() for row in self.selected_rows: selection.append(QItemSelectionRange( model.index(row, 0), model.index(row, columnCount - 1))) selModel.select(selection, QItemSelectionModel.ClearAndSelect) def headerClick(self, index): if index >= 1 and self.selectMethod == OWRank.SelectNBest: # Reselect the top ranked attributes self.autoSelection() # Store the header states disc = bytes(self.discRanksView.horizontalHeader().saveState()) cont = bytes(self.contRanksView.horizontalHeader().saveState()) no_class = bytes(self.noClassRanksView.horizontalHeader().saveState()) self.headerState = [disc, cont, no_class] def measuresSelectionChanged(self, measure): """Measure selection has changed. Update column visibility. """ checked = self.selectedMeasures[measure.name] self.selectedMeasures[measure.name] = not checked if not checked: self.selected_checks.add(measure.name) elif measure.name in self.selected_checks: self.selected_checks.remove(measure.name) measures_mask = [False] * len(self.measures) measures_mask += [False for _ in self.learners] # Update scores for shown column if they are not yet computed. if measure in self.measures and self.measure_scores: index = self.measures.index(measure) if all(s is None for s in self.measure_scores[index]): measures_mask[index] = True if self.update_scores: self.updateScores(measures_mask) self.updateVisibleScoreColumns() def updateVisibleScoreColumns(self): """ Update the visible columns of the scores view. """ for i, measure in enumerate(self.measures): shown = self.selectedMeasures.get(measure.name) self.ranksView.setColumnHidden(i + 1, not shown) self.ranksView.setColumnWidth(i + 1, 100) index = self.ranksView.horizontalHeader().sortIndicatorSection() if self.ranksView.isColumnHidden(index): self.headerState[self.rankMode] = None if self.headerState[self.rankMode] is None: def get_sort_by_col(measures, selected_measures): cols = [i + 1 for i, m in enumerate(measures) if m.name in selected_measures] return cols[0] if cols else len(measures) + 1 col = get_sort_by_col(self.measures, self.selected_checks) self.ranksView.sortByColumn(col, Qt.DescendingOrder) self.autoSelection() def updateDelegates(self): self.contRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) self.discRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) self.noClassRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) def send_report(self): if not self.data: return self.report_domain("Input", self.data.domain) self.report_table("Ranks", self.ranksView, num_format="{:.3f}") if self.out_domain_desc is not None: self.report_items("Output", self.out_domain_desc) def commit(self): self.selected_rows = self.get_selection() if self.data and len(self.data.domain.attributes) == len( self.selected_rows): self.selectMethod = OWRank.SelectAll self.selectButtons.button(self.selectMethod).setChecked(True) selected = self.selectedAttrs() if not self.data or not selected: self.send("Reduced Data", None) self.out_domain_desc = None else: data = Table(Domain(selected, self.data.domain.class_var, self.data.domain.metas), self.data) self.send("Reduced Data", data) self.out_domain_desc = report.describe_domain(data.domain) def selectedAttrs(self): if self.data: inds = self.ranksView.selectionModel().selectedRows(0) source = self.ranksProxyModel.mapToSource inds = map(source, inds) inds = [ind.row() for ind in inds] return [self.data.domain.attributes[i] for i in inds] else: return [] def create_scores_table(self, labels): indices = [i for i, m in enumerate(self.measures) if self.selectedMeasures.get(m.name, False)] measures = [s.name for s in self.measures if self.selectedMeasures.get(s.name, False)] measures += [label for label in labels] if not measures: return None features = [ContinuousVariable(s) for s in measures] metas = [StringVariable("Feature name")] domain = Domain(features, metas=metas) scores = np.array([row for i, row in enumerate(self.measure_scores) if i in indices or i >= len(self.measures)]).T feature_names = np.array([a.name for a in self.data.domain.attributes]) # Reshape to 2d array as Table does not like 1d arrays feature_names = feature_names[:, None] new_table = Table(domain, scores, metas=feature_names) new_table.name = "Feature Scores" return new_table @classmethod def migrate_settings(cls, settings, version): if not version: # Before fc5caa1e1d716607f1f5c4e0b0be265c23280fa0 # headerState had length 2 headerState = settings.get("headerState", None) if headerState is not None and \ isinstance(headerState, tuple) and \ len(headerState) < 3: headerState = (list(headerState) + [None] * 3)[:3] settings["headerState"] = headerState
class OWConcordance(OWWidget): name = "Concordance" description = "Display the context of the word." icon = "icons/Concordance.svg" priority = 30000 inputs = [ ('Corpus', Table, 'set_corpus'), ('Query Word', Topic, 'set_word_from_input'), ] outputs = [('Selected Documents', Table, )] autocommit = Setting(True) context_width = Setting(5) word = Setting("") # TODO Set selection settings. class Warning(OWWidget.Warning): multiple_words_on_input = Msg("Multiple query words on input. " "Only the first one is considered!") def __init__(self): super().__init__() self.corpus = None # Corpus self.n_documents = '' # Info on docs self.n_matching = '' # Info on docs matching the word self.n_tokens = '' # Info on tokens self.n_types = '' # Info on types (unique tokens) # Info attributes info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Documents: %(n_documents)s') gui.label(info_box, self, 'Tokens: %(n_tokens)s') gui.label(info_box, self, 'Types: %(n_types)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Width parameter gui.spin(self.controlArea, self, 'context_width', 3, 10, box=True, label="Number of words:", callback=self.set_width) gui.rubber(self.controlArea) # Search c_box = gui.widgetBox(self.mainArea, orientation="vertical") self.input = gui.lineEdit( c_box, self, 'word', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='Query:', callback=self.set_word, callbackOnType=True) self.input.setFocus() # Concordances view self.conc_view = QTableView() self.model = ConcordanceModel() self.conc_view.setModel(self.model) self.conc_view.setWordWrap(False) self.conc_view.setSelectionBehavior(QTableView.SelectRows) self.conc_view.setSelectionModel(DocumentSelectionModel(self.model)) self.conc_view.setItemDelegate(HorizontalGridDelegate()) # connect selectionChanged to self.commit(), which will be # updated by gui.auto_commit() self.conc_view.selectionModel().selectionChanged.connect(lambda: self.commit()) self.conc_view.horizontalHeader().hide() self.conc_view.setShowGrid(False) self.mainArea.layout().addWidget(self.conc_view) self.set_width() # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit', 'Auto commit is on') def sizeHint(self): # pragma: no cover return QSize(600, 400) def set_width(self): sel = self.conc_view.selectionModel().selection() self.model.set_width(self.context_width) if sel: self.conc_view.selectionModel().select(sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) def set_corpus(self, data=None): self.corpus = data if data is not None and not isinstance(data, Corpus): self.corpus = Corpus.from_table(data.domain, data) self.model.set_corpus(self.corpus) self.update_widget() self.commit() def set_word_from_input(self, topic): self.Warning.multiple_words_on_input.clear() have_word = topic is not None and len(topic) > 0 self.input.setEnabled(not have_word) if have_word: if len(topic) > 1: self.Warning.multiple_words_on_input() self.word = topic.metas[0, 0] self.set_word() def set_word(self): self.model.set_word(self.word) self.update_widget() def resize_columns(self): col_width = (self.conc_view.width() - self.conc_view.columnWidth(1)) / 2 - 12 self.conc_view.setColumnWidth(0, col_width) self.conc_view.setColumnWidth(2, col_width) def resizeEvent(self, event): # pragma: no cover super().resizeEvent(event) self.resize_columns() def update_widget(self): self.conc_view.resizeColumnToContents(1) self.resize_columns() self.conc_view.resizeRowsToContents() if self.corpus is not None: self.n_documents = len(self.corpus) self.n_matching = '{}/{}'.format( self.model.matching_docs() if self.word else 0, self.n_documents) self.n_tokens = sum(map(len, self.corpus.tokens)) \ if self.corpus.has_tokens() else 'n/a' self.n_types = len(self.corpus.dictionary) \ if self.corpus.has_tokens() else 'n/a' else: self.n_documents = '' self.n_matching = '' self.n_tokens = '' self.n_types = '' def commit(self): rows = [sel_range.top() for sel_range in self.conc_view.selectionModel().selection()] selected_docs = sorted(set(self.model.word_index[row][0] for row in rows)) if selected_docs: selected = self.corpus[selected_docs] self.send("Selected Documents", selected) else: self.send("Selected Documents", None)