Пример #1
0
def _run(
    data: Table,
    group_by_attrs: List[Variable],
    aggregations: Dict[Variable, Set[str]],
    result: Result,
    state: TaskState,
) -> Result:
    def progress(part):
        state.set_progress_value(part * 100)
        if state.is_interruption_requested():
            raise Exception

    state.set_status("Aggregating")
    # group table rows
    if result.group_by is None:
        result.group_by = data.groupby(group_by_attrs)
    state.set_partial_result(result)

    aggregations = {
        var: [(agg, AGGREGATIONS[agg].function)
              for agg in sorted(aggs, key=AGGREGATIONS_ORD.index)]
        for var, aggs in aggregations.items()
    }
    result.result_table = result.group_by.aggregate(
        aggregations, wrap_callback(progress, 0.2, 1))
    return result
Пример #2
0
def add_task_to_dispose_queue(task: TaskState):
    # transfer ownership of task to Qt, and delete it after completion
    # all other signals from task should be disconnected.
    assert task.parent() is None
    app = QApplication.instance()
    task.setParent(app)
    task.watcher.finished.connect(task.deleteLater)
Пример #3
0
def worker(data: Table, learner, state: TaskState):
    # No need to check for irregularities, this is done in widget
    time_var, event_var = get_survival_endpoints(data.domain)

    def fit_cox_models(attrs_combinations):
        results = []
        for attrs in attrs_combinations:
            columns = attrs + [time_var.name, event_var.name]
            cph_model = learner(data[:, columns])
            log2p = cph_model.ll_ratio_log2p()
            result = Result(log2p, cph_model)
            results.append(result)
        return results

    attributes = [attr for attr in data.domain.attributes]
    progress_steps = iter(np.linspace(0, 100, len(attributes)))
    _trace = fit_cox_models([attributes])
    while len(_trace) != len(data.domain.attributes):
        attributes = [attr for attr in _trace[-1].model.domain.attributes]

        if len(attributes) > 1:
            combinations = [
                list(comb)
                for comb in itertools.combinations(attributes,
                                                   len(attributes) - 1)
            ]
        else:
            combinations = [attributes]

        results = fit_cox_models(combinations)
        _trace.append(max(results, key=lambda result: result.log2p))
        state.set_progress_value(next(progress_steps))
    return _trace
Пример #4
0
def run(corpus: Corpus, statistics: Tuple[int, str], state: TaskState) -> None:
    """
    This function runs the computation for new features.
    All results will be reported as a partial results.

    Parameters
    ----------
    corpus
        The corpus on which the computation is held.
    statistics
        Tuple of statistic pairs to be computed:
        (statistics id, string pattern)
    state
        State used to report progress and partial results.
    """
    # callback is called for each corpus element statistics time
    tick_values = iter(np.linspace(0, 100, len(corpus) * len(statistics)))

    def advance():
        state.set_progress_value(next(tick_values))

    for s, patern in statistics:
        fun = STATISTICS_FUNCTIONS[s]
        result = fun(corpus, patern, advance)
        if result is not None:
            result = result + (ComputeValue(fun, patern),)
        state.set_partial_result((s, patern, result))
Пример #5
0
def _prepare_dir_and_save_images(paths_queue, dir_name, target_size,
                                 previously_saved, state: TaskState):
    """
    This function prepares a directory structure and calls function
    that saves images.

    Parameters
    ----------
    previously_saved : int
        Number of saved images in the previous process. If the process is
        resumed it is non-zero.
    """
    res = Result(paths=paths_queue)
    if previously_saved == 0:
        _clean_dir(dir_name)

    steps = len(paths_queue) + previously_saved
    loader = ImageLoader()
    while res.paths:
        from_path, to_path = res.paths.popleft()
        _save_an_image(loader, from_path, to_path, target_size)

        state.set_progress_value((1 - len(res.paths) / steps) * 100)
        state.set_partial_result(res)
        if state.is_interruption_requested():
            return res

    return res
Пример #6
0
 def _load_corpus(path: str, data: Table, state: TaskState) -> Corpus:
     state.set_status("Loading")
     corpus = None
     if data:
         corpus = Corpus.from_table(data.domain, data)
     elif path:
         corpus = Corpus.from_file(path)
         corpus.name = os.path.splitext(os.path.basename(path))[0]
     return corpus
Пример #7
0
def run_embedding(
    images: Table,
    file_paths_attr: Variable,
    embedder_name: str,
    state: TaskState,
) -> Result:
    """
    Run the embedding process

    Parameters
    ----------
    images
        Data table with images to embed.
    file_paths_attr
        The column of the table with images.
    embedder_name
        The name of selected embedder.
    state
        State object used for controlling and progress.

    Returns
    -------
    The object that holds embedded images, skipped images, and number
    of skipped images.
    """
    embedder = ImageEmbedder(model=embedder_name)

    file_paths = images[:, file_paths_attr].metas.flatten()

    file_paths_mask = file_paths == file_paths_attr.Unknown
    file_paths_valid = file_paths[~file_paths_mask]

    # init progress bar and fuction
    ticks = iter(np.linspace(0.0, 100.0, file_paths_valid.size))

    def advance(success=True):
        if state.is_interruption_requested():
            embedder.set_canceled()
        if success:
            state.set_progress_value(next(ticks))

    try:
        emb, skip, n_skip = embedder(images,
                                     col=file_paths_attr,
                                     callback=advance)
    except EmbeddingConnectionError:
        # recompute ticks to go from current state to 100
        ticks = iter(np.linspace(next(ticks), 100.0, file_paths_valid.size))

        state.set_partial_result("squeezenet")
        embedder = ImageEmbedder(model="squeezenet")
        emb, skip, n_skip = embedder(images,
                                     col=file_paths_attr,
                                     callback=advance)

    return Result(embedding=emb, skip_images=skip, num_skipped=n_skip)
Пример #8
0
 def compute_secondary_clusters(embedding: Table, result: Result, state: TaskState):
     if not result.clusters.groups or not embedding:
         result.clusters.secondary_table = None
     else:
         state.set_status("Finding secondary clusters...")
         hulls = {k: v[2] for k, v in result.clusters.groups.items()}
         clusters = result.clusters.table
         domain = clusters and clusters.domain["Clusters"]
         table = cluster_additional_points(embedding, hulls, domain)
         result.clusters.secondary_table = table
     state.set_partial_result(("secondary_clusters", result))
Пример #9
0
def _run(
    corpus: Corpus,
    words: List[str],
    scoring_methods: List[str],
    aggregation: str,
    additional_params: dict,
    state: TaskState,
) -> None:
    """
    Perform word scoring with selected scoring methods

    Parameters
    ----------
    corpus
        Corpus of documents
    words
        List of words used for scoring
    scoring_methods
        Methods to score documents with
    aggregation
        Aggregation applied for each document on word scores
    additional_params
        Additional prameters for scores (e.g. embedding needs text language)
    state
        TaskState for reporting the task status and giving partial results
    """
    def callback(i: float) -> None:
        state.set_progress_value(i * 100)
        if state.is_interruption_requested():
            raise Exception

    cb_part = 1 / (len(scoring_methods) + 1)  # +1 for preprocessing

    words = _preprocess_words(corpus, words,
                              wrap_callback(callback, end=cb_part))
    if len(words) == 0:
        raise Exception(
            "Empty word list after preprocessing. Please provide a valid set of words."
        )
    for i, sm in enumerate(scoring_methods):
        scoring_method = SCORING_METHODS[sm][1]
        sig = signature(scoring_method)
        add_params = {
            k: v
            for k, v in additional_params.items() if k in sig.parameters
        }
        scs = scoring_method(
            corpus, words,
            wrap_callback(callback,
                          start=(i + 1) * cb_part,
                          end=(i + 2) * cb_part), **add_params)
        scs = AGGREGATIONS[aggregation](scs, axis=1)
        state.set_partial_result((sm, aggregation, scs))
Пример #10
0
def run_gene_matcher(gene_matcher: GeneMatcher, state: TaskState):
    current_iter = 0
    max_iter = len(gene_matcher.genes)

    def callback():
        nonlocal current_iter
        current_iter += 1
        state.set_progress_value(100 * (current_iter / max_iter))

    state.set_status("Working ...")
    gene_matcher._progress_callback = callback
    gene_matcher.match_genes()
Пример #11
0
def run(gene_sets: GeneSets, selected_gene_sets: List[Tuple[str, ...]], genes,
        state: TaskState) -> Results:
    results = Results()
    items = []
    step, steps = 0, len(gene_sets)

    if not genes:
        return results

    state.set_status('Calculating...')

    for gene_set in sorted(gene_sets):

        step += 1
        if step % (steps / 10) == 0:
            state.set_progress_value(100 * step / steps)

        if gene_set.hierarchy not in selected_gene_sets:
            continue

        if state.is_interruption_requested():
            return results

        matched_set = gene_set.genes & genes
        if len(matched_set) > 0:
            category_column = QStandardItem()
            term_column = QStandardItem()
            count_column = QStandardItem()
            genes_column = QStandardItem()

            category_column.setData(", ".join(gene_set.hierarchy),
                                    Qt.DisplayRole)
            term_column.setData(gene_set.name, Qt.DisplayRole)
            term_column.setData(gene_set.name, Qt.ToolTipRole)

            # there was some cases when link string was not empty string but not valid (e.g. "_")
            if gene_set.link and urlparse(gene_set.link).scheme:
                term_column.setData(gene_set.link, LinkRole)
                term_column.setForeground(QColor(Qt.blue))

            count_column.setData(matched_set, Qt.UserRole)
            count_column.setData(len(matched_set), Qt.DisplayRole)

            genes_column.setData(len(gene_set.genes), Qt.DisplayRole)
            genes_column.setData(
                set(gene_set.genes),
                Qt.UserRole)  # store genes to get then on output on selection

            items.append(
                [count_column, genes_column, category_column, term_column])

    results.items = items
    return results
Пример #12
0
 def compute_clusters(embedding: Table, result: Result, state: TaskState):
     if not result.scores.table or not embedding:
         result.clusters.table = None
         result.clusters.groups = None
     else:
         state.set_status("Finding clusters...")
         kwargs = {}
         if result.clusters.epsilon is not None:
             kwargs["eps"] = result.clusters.epsilon
         clusters = annotate_projection(result.scores.table, embedding, **kwargs)
         result.clusters.table = clusters[0]
         result.clusters.groups = clusters[1]
         result.clusters.epsilon = clusters[2]
     state.set_partial_result(("clusters", result))
Пример #13
0
def count_words(data: Corpus, state: TaskState) -> Tuple[Counter, bool]:
    """
    This function implements counting process of the word cloud widget and
    is called in the separate thread by concurrent.

    Parameters
    ----------
    data
        Corpus with the data
    state
        State used to report status.

    Returns
    -------
    Reports counts as a counter and boolean that tell whether the data were
    retrieved on bag of words basis.
    """
    state.set_status("Calculating...")
    state.set_progress_value(0)
    bow_counts = _bow_words(data)
    state.set_progress_value(0.5)
    if bow_counts:
        corpus_counter = Counter(bow_counts)
    else:
        corpus_counter = Counter(w for doc in data.ngrams for w in doc)
    state.set_progress_value(1)
    return corpus_counter, bool(bow_counts)
Пример #14
0
    def run(data: Orange.data.Table, metric: distance, normalized_dist: bool,
            axis: int, state: TaskState) -> Orange.misc.DistMatrix:
        if data is None:
            return None

        def callback(i: float) -> bool:
            state.set_progress_value(i)
            if state.is_interruption_requested():
                raise InterruptException

        state.set_status("Calculating...")
        kwargs = {"axis": 1 - axis, "impute": True, "callback": callback}
        if metric.supports_normalization and normalized_dist:
            kwargs["normalize"] = True
        return metric(data, **kwargs)
Пример #15
0
def run_vizrank(compute_score: Callable, states: Iterator,
                scores: List, task: TaskState):
    res = Result(queue=Queue(), scores=None)
    scores = scores.copy()

    def do_work(st, next_st):
        try:
            score = compute_score(st)
            if score is not None:
                pos = bisect_left(scores, score)
                res.queue.put_nowait(QueuedScore(position=pos, score=score,
                                                 state=st, next_state=next_st))
                scores.insert(pos, score)
        except Exception:  # ignore current state in case of any problem
            pass
        res.scores = scores.copy()
        task.set_partial_result(res)

    state = None
    next_state = next(states)
    try:
        while True:
            if task.is_interruption_requested():
                return res
            state = copy.copy(next_state)
            next_state = copy.copy(next(states))
            do_work(state, next_state)
    except StopIteration:
        do_work(state, None)
    return res
Пример #16
0
def run_vizrank(compute_score: Callable, states: Iterator, scores: List,
                task: TaskState):
    res = Result(queue=Queue(), scores=None)
    scores = scores.copy()

    def do_work(st, next_st):
        try:
            score = compute_score(st)
            if score is not None:
                pos = bisect_left(scores, score)
                res.queue.put_nowait(
                    QueuedScore(position=pos,
                                score=score,
                                state=st,
                                next_state=next_st))
                scores.insert(pos, score)
        except Exception:  # ignore current state in case of any problem
            pass
        res.scores = scores.copy()
        task.set_partial_result(res)

    state = None
    next_state = next(states)
    try:
        while True:
            if task.is_interruption_requested():
                return res
            state = copy.copy(next_state)
            next_state = copy.copy(next(states))
            do_work(state, next_state)
    except StopIteration:
        do_work(state, None)
    return res
Пример #17
0
def run_download_task(gds_id: str, samples: DefaultDict[str, list],
                      transpose: bool, state: TaskState):
    res = Result()
    current_iter = 0
    max_iter = 102

    def callback():
        nonlocal current_iter
        current_iter += 1
        state.set_progress_value(100 * (current_iter / max_iter))

    state.set_status("Downloading...")
    res.gds_dataset = dataset_download(gds_id,
                                       samples,
                                       transpose=transpose,
                                       callback=callback)
    return res
Пример #18
0
def run_mds(matrix: DistMatrix, max_iter: int, step_size: int, init_type: int,
            embedding: np.ndarray, state: TaskState):
    res = Result(embedding=embedding)

    iterations_done = 0
    init = embedding
    state.set_status("Running...")
    oldstress = np.finfo(np.float).max

    while True:
        step_iter = min(max_iter - iterations_done, step_size)
        mds = MDS(
            dissimilarity="precomputed", n_components=2,
            n_init=1, max_iter=step_iter,
            init_type=init_type, init_data=init
        )

        mdsfit = mds(matrix)
        iterations_done += step_iter

        embedding, stress = mdsfit.embedding_, mdsfit.stress_
        emb_norm = np.sqrt(np.sum(embedding ** 2, axis=1)).sum()
        if emb_norm > 0:
            stress /= emb_norm

        res.embedding = embedding
        state.set_partial_result(res)
        state.set_progress_value(100 * iterations_done / max_iter)
        if iterations_done >= max_iter or stress == 0 or \
                (oldstress - stress) < mds.params["eps"]:
            return res
        init = embedding
        oldstress = stress
        if state.is_interruption_requested():
            return res
Пример #19
0
def run_mds(matrix: DistMatrix, max_iter: int, step_size: int, init_type: int,
            embedding: np.ndarray, state: TaskState):
    res = Result(embedding=embedding)

    iterations_done = 0
    init = embedding
    state.set_status("Running...")
    oldstress = np.finfo(np.float).max

    while True:
        step_iter = min(max_iter - iterations_done, step_size)
        mds = MDS(
            dissimilarity="precomputed", n_components=2,
            n_init=1, max_iter=step_iter,
            init_type=init_type, init_data=init
        )

        mdsfit = mds(matrix)
        iterations_done += step_iter

        embedding, stress = mdsfit.embedding_, mdsfit.stress_
        emb_norm = np.sqrt(np.sum(embedding ** 2, axis=1)).sum()
        if emb_norm > 0:
            stress /= emb_norm

        res.embedding = embedding
        state.set_partial_result(res)
        state.set_progress_value(100 * iterations_done / max_iter)
        if iterations_done >= max_iter or stress == 0 or \
                (oldstress - stress) < mds.params["eps"]:
            return res
        init = embedding
        oldstress = stress
        if state.is_interruption_requested():
            return res
def worker(table: Table, covariates: List, time_var: str, event_var: str,
           state: TaskState):
    with multiprocessing.Manager() as _manager:
        _queue = _manager.Queue()
        _cpu_count = cpu_count()

        df = table_to_frame(table, include_metas=False)
        df = df.astype({event_var: np.float64})
        if len(covariates) > 50:
            batches = (df[[time_var, event_var] + batch] for batch in
                       [covariates[i::_cpu_count] for i in range(_cpu_count)])
        else:
            batches = (df[[time_var, event_var] + [cov]] for cov in covariates)
        progress_steps = iter(np.linspace(0, 100, len(covariates)))

        with multiprocessing.Pool(processes=_cpu_count) as pool:
            results = pool.map_async(
                partial(
                    batch_to_process,
                    _queue,
                    time_var,
                    event_var,
                ),
                batches,
            )

            while True:
                try:
                    state.set_progress_value(next(progress_steps))
                    _queue.get(timeout=3)
                except (queue.Empty, StopIteration):
                    break

            stacked_result = np.vstack(results.get())
            covariate_names = stacked_result[:, 0]
            results = stacked_result[:, 1:].astype(float)
            _, pvals_corrected = fdrcorrection(results[:, -1], is_sorted=False)
            results = np.hstack(
                (results, pvals_corrected.reshape(pvals_corrected.shape[0],
                                                  -1)))
            return covariate_names, results
Пример #21
0
def run(data: Table, desc, use_values, task: TaskState) -> Result:
    if task.is_interruption_requested():
        raise CancelledError  # pragma: no cover
    new_variables = construct_variables(desc, data, use_values)
    # Explicit cancellation point after `construct_variables` which can
    # already run `compute_value`.
    if task.is_interruption_requested():
        raise CancelledError  # pragma: no cover
    attrs = [var for var in new_variables if var.is_primitive()]
    metas = [var for var in new_variables if not var.is_primitive()]
    new_domain = Orange.data.Domain(data.domain.attributes + tuple(attrs),
                                    data.domain.class_vars,
                                    metas=data.domain.metas + tuple(metas))
    try:
        for variable in new_variables:
            variable.compute_value.mask_exceptions = False
        data = data.transform(new_domain)
    finally:
        for variable in new_variables:
            variable.compute_value.mask_exceptions = True
    return Result(data, attrs, metas)
Пример #22
0
    def __submit(self, testfunc):
        # type: (Callable[[Callable[[float], None]], Results]) -> None
        """
        Submit a testing function for evaluation

        MUST not be called if an evaluation is already pending/running.
        Cancel the existing task first.

        Parameters
        ----------
        testfunc : Callable[[Callable[float]], Results])
            Must be a callable taking a single `callback` argument and
            returning a Results instance
        """
        assert self.__state != State.Running
        # Setup the task
        task = TaskState()

        def progress_callback(finished):
            if task.is_interruption_requested():
                raise UserInterrupt()
            task.set_progress_value(100 * finished)

        testfunc = partial(testfunc, callback=progress_callback)
        task.start(self.__executor, testfunc)

        task.progress_changed.connect(self.setProgressValue)
        task.watcher.finished.connect(self.__task_complete)

        self.Outputs.evaluations_results.invalidate()
        self.Outputs.predictions.invalidate()
        self.progressBarInit()
        self.setStatusMessage("Running")

        self.__state = State.Running
        self.__task = task
Пример #23
0
 def run(selected_data_transformed: Table, data: Table, result: Result,
         state: TaskState) -> None:
     state.set_status("Listing words")
     result.words = [
         i.name for i in selected_data_transformed.domain.attributes
     ]
     state.set_status("Computing p-values")
     result.p_values = hypergeom_p_values(data.X,
                                          selected_data_transformed.X,
                                          callback=state.set_progress_value)
     state.set_status("Computing FDR values")
     result.fdr_values = FDR(result.p_values)
Пример #24
0
def run(data: Table, embedding: Optional[np.ndarray], state: TaskState):
    res = Result(embedding=embedding)

    # simulate wasteful calculation (increase 'steps')
    step, steps = 0, 10
    state.set_status("Calculating...")
    while step < steps:
        for _ in range(steps):
            x_data = np.array(np.mean(data.X, axis=1))
            if x_data.ndim == 2:
                x_data = x_data.ravel()
            y_data = np.random.rand(len(x_data))
            embedding = np.vstack((x_data, y_data)).T
        step += 1
        if step % (steps / 10) == 0:
            state.set_progress_value(100 * step / steps)

        if state.is_interruption_requested():
            return res

        res.embedding = embedding
        state.set_partial_result(res)
    return res
Пример #25
0
def run(data: Table, embedding: Optional[np.ndarray], state: TaskState):
    res = Result(embedding=embedding)

    # simulate wasteful calculation (increase 'steps')
    step, steps = 0, 10
    state.set_status("Calculating...")
    while step < steps:
        for _ in range(steps):
            x_data = np.array(np.mean(data.X, axis=1))
            if x_data.ndim == 2:
                x_data = x_data.ravel()
            y_data = np.random.rand(len(x_data))
            embedding = np.vstack((x_data, y_data)).T
        step += 1
        if step % (steps / 10) == 0:
            state.set_progress_value(100 * step / steps)

        if state.is_interruption_requested():
            return res

        res.embedding = embedding
        state.set_partial_result(res)
    return res
Пример #26
0
def run_freeviz(data: Table, projector: FreeViz, state: TaskState):
    res = Result(projector=projector, projection=None)
    step, steps = 0, MAX_ITERATIONS
    initial = res.projector.components_.T
    state.set_status("Calculating...")
    while True:
        # Needs a copy because projection should not be modified inplace.
        # If it is modified inplace, the widget and the thread hold a
        # reference to the same object. When the thread is interrupted it
        # is still modifying the object, but the widget receives it
        # (the modified object) with a delay.
        res.projection = res.projector(data).copy()
        anchors = res.projector.components_.T
        res.projector.initial = anchors

        state.set_partial_result(res)
        if np.allclose(initial, anchors, rtol=1e-5, atol=1e-4):
            return res
        initial = anchors

        step += 1
        state.set_progress_value(100 * step / steps)
        if state.is_interruption_requested():
            return res
Пример #27
0
def run_freeviz(data: Table, projector: FreeViz, state: TaskState):
    res = Result(projector=projector, projection=None)
    step, steps = 0, MAX_ITERATIONS
    initial = res.projector.components_.T
    state.set_status("Calculating...")
    while True:
        # Needs a copy because projection should not be modified inplace.
        # If it is modified inplace, the widget and the thread hold a
        # reference to the same object. When the thread is interrupted it
        # is still modifying the object, but the widget receives it
        # (the modified object) with a delay.
        res.projection = res.projector(data).copy()
        anchors = res.projector.components_.T
        res.projector.initial = anchors

        state.set_partial_result(res)
        if np.allclose(initial, anchors, rtol=1e-5, atol=1e-4):
            return res
        initial = anchors

        step += 1
        state.set_progress_value(100 * step / steps)
        if state.is_interruption_requested():
            return res
Пример #28
0
def run(
    gene_sets: GeneSets, selected_gene_sets: List[Tuple[str, ...]], genes, state: TaskState, reference_genes=None
) -> Results:
    results = Results()
    items = []
    step, steps = 0, len(gene_sets)

    def set_progress():
        nonlocal step
        step += 1
        state.set_progress_value(100 * (step / steps))

    if not genes:
        return results

    state.set_status('Calculating...')

    for gene_set in sorted(gene_sets):
        set_progress()

        if gene_set.hierarchy not in selected_gene_sets:
            continue

        if state.is_interruption_requested():
            return results

        reference_genes = [] if reference_genes is None else reference_genes
        enrichemnt_result = gene_set.set_enrichment(reference_genes, genes.intersection(reference_genes))

        if len(enrichemnt_result.query) > 0:
            category_column = QStandardItem()
            term_column = QStandardItem()
            count_column = QStandardItem()
            genes_column = QStandardItem()
            ref_column = QStandardItem()
            pval_column = QStandardItem()
            fdr_column = QStandardItem()
            enrichment_column = QStandardItem()

            category_column.setData(", ".join(gene_set.hierarchy), Qt.DisplayRole)
            term_column.setData(gene_set.name, Qt.DisplayRole)
            term_column.setData(gene_set.name, Qt.ToolTipRole)
            # there was some cases when link string was not empty string but not valid (e.g. "_")
            if gene_set.link and urlparse(gene_set.link).scheme:
                term_column.setData(gene_set.link, LinkRole)
                term_column.setForeground(QColor(Qt.blue))

            count_column.setData(len(enrichemnt_result.query), Qt.DisplayRole)
            count_column.setData(set(enrichemnt_result.query), Qt.UserRole)

            genes_column.setData(len(gene_set.genes), Qt.DisplayRole)
            genes_column.setData(set(gene_set.genes), Qt.UserRole)  # store genes to get then on output on selection

            ref_column.setData(len(enrichemnt_result.reference), Qt.DisplayRole)

            pval_column.setData(enrichemnt_result.p_value, Qt.DisplayRole)
            pval_column.setData(enrichemnt_result.p_value, Qt.ToolTipRole)

            enrichment_column.setData(enrichemnt_result.enrichment_score, Qt.DisplayRole)
            enrichment_column.setData(enrichemnt_result.enrichment_score, Qt.ToolTipRole)

            items.append(
                [
                    count_column,
                    ref_column,
                    pval_column,
                    fdr_column,
                    enrichment_column,
                    genes_column,
                    category_column,
                    term_column,
                ]
            )

    results.items = items
    return results
Пример #29
0
def run_vizrank(compute_score: Callable, iterate_states: Callable,
                saved_state: Optional[Iterable], scores: List, progress: int,
                state_count: int, task: TaskState):
    task.set_status("Getting combinations...")
    task.set_progress_value(0.1)
    states = iterate_states(saved_state)

    task.set_status("Getting scores...")
    res = Result(queue=Queue(), scores=None)
    scores = scores.copy()
    can_set_partial_result = True

    def do_work(st, next_st):
        try:
            score = compute_score(st)
            if score is not None:
                pos = bisect_left(scores, score)
                res.queue.put_nowait(
                    QueuedScore(position=pos,
                                score=score,
                                state=st,
                                next_state=next_st))
                scores.insert(pos, score)
        except Exception:  # ignore current state in case of any problem
            pass
        res.scores = scores.copy()

    def reset_flag():
        nonlocal can_set_partial_result
        can_set_partial_result = True

    state = None
    next_state = next(states)
    try:
        while True:
            if task.is_interruption_requested():
                return res
            task.set_progress_value(int(progress * 100 / max(1, state_count)))
            progress += 1
            state = copy.copy(next_state)
            next_state = copy.copy(next(states))
            do_work(state, next_state)
            # for simple scores (e.g. correlations widget) and many feature
            # combinations, the 'partial_result_ready' signal (emitted by
            # invoking 'task.set_partial_result') was emitted too frequently
            # for a longer period of time and therefore causing the widget
            # being unresponsive
            if can_set_partial_result:
                task.set_partial_result(res)
                can_set_partial_result = False
                Timer(0.01, reset_flag).start()
    except StopIteration:
        do_work(state, None)
        task.set_partial_result(res)
    return res
    def runner(self, state: TaskState) -> Table:
        exp_type = self.data_output_options.expression_type[self.exp_type].type
        exp_source = self.data_output_options.expression_sources[
            self.exp_source]
        proc_slug = self.data_output_options.process[self.proc_slug].slug
        collection_id = self.selected_collection_id

        table = self.data_table
        progress_steps_download = iter(np.linspace(0, 50, 2))

        def callback(i: float, status=""):
            state.set_progress_value(i * 100)
            if status:
                state.set_status(status)
            if state.is_interruption_requested():
                raise Exception

        if not table:
            collection = self.res.get_collection_by_id(collection_id)
            coll_table = resdk.tables.RNATables(
                collection,
                expression_source=exp_source,
                expression_process_slug=proc_slug,
                progress_callable=wrap_callback(callback, end=0.5),
            )
            species = coll_table._data[0].output['species']
            sample = coll_table._samples[0]

            state.set_status('Downloading ...')
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            df_exp = coll_table.exp if exp_type != 'rc' else coll_table.rc
            df_exp = df_exp.rename(index=coll_table.readable_index)
            df_metas = coll_table.meta
            df_metas = df_metas.rename(index=coll_table.readable_index)
            df_qc = None
            if self.append_qc_data:
                # TODO: check if there is a way to detect if collection
                #       table contains QC data
                try:
                    df_qc = coll_table.qc
                    df_qc = df_qc.rename(index=coll_table.readable_index)
                except ValueError:
                    pass
            loop.close()

            state.set_status('To data table ...')

            duplicates = {
                item
                for item, count in Counter([
                    label.split('.')[1]
                    for label in df_metas.columns.to_list() if '.' in label
                ]).items() if count > 1
            }

            # what happens if there is more nested sections?
            section_name_to_label = {
                section['name']: section['label']
                for section in sample.descriptor_schema.schema
            }

            column_labels = {}
            for field_schema, fields, path in iterate_schema(
                    sample.descriptor, sample.descriptor_schema.schema,
                    path=''):
                path = path[1:]  # this is ugly, but cant go around it
                if path not in df_metas.columns:
                    continue
                label = field_schema['label']
                section_name, field_name = path.split('.')
                column_labels[path] = (
                    label if field_name not in duplicates else
                    f'{section_name_to_label[section_name]} - {label}')

            df_exp = df_exp.reset_index(drop=True)
            df_metas = df_metas.astype('object')
            df_metas = df_metas.fillna(np.nan)
            df_metas = df_metas.replace('nan', np.nan)
            df_metas = df_metas.rename(columns=column_labels)
            if df_qc is not None:
                df_metas = pd.merge(df_metas,
                                    df_qc,
                                    left_index=True,
                                    right_index=True)

            xym, domain_metas = vars_from_df(df_metas)
            x, _, m = xym
            x_metas = np.hstack((x, m))
            attrs = [ContinuousVariable(col) for col in df_exp.columns]
            metas = domain_metas.attributes + domain_metas.metas
            domain = Domain(attrs, metas=metas)
            table = Table(domain, df_exp.to_numpy(), metas=x_metas)
            state.set_progress_value(next(progress_steps_download))

            state.set_status('Matching genes ...')
            progress_steps_gm = iter(
                np.linspace(50, 99, len(coll_table.gene_ids)))

            def gm_callback():
                state.set_progress_value(next(progress_steps_gm))

            tax_id = species_name_to_taxid(species)
            gm = GeneMatcher(tax_id, progress_callback=gm_callback)
            table = gm.match_table_attributes(table, rename=True)
            table.attributes[TableAnnotation.tax_id] = tax_id
            table.attributes[TableAnnotation.gene_as_attr_name] = True
            table.attributes[TableAnnotation.gene_id_attribute] = 'Entrez ID'
            self.data_table = table

        state.set_status('Normalizing ...')
        table = self.normalize(table)
        state.set_progress_value(100)

        return table
Пример #31
0
    def compute_scores(
        data: Table,
        genes: Table,
        p_threshold: float,
        p_value_fun: str,
        scoring: str,
        start: float,
        end: float,
        result: Result,
        state: TaskState,
    ):
        if not data or not genes:
            result.scores.z_vals = None
            result.scores.annotations = None
            result.scores.p_vals = None
            result.scores.table = None
        else:
            state.set_status("Computing scores...")
            weights = np.array([15, 75, 10]) * (end - start) / 100

            if not result.scores.z_vals:
                result.scores.z_vals = AnnotateSamplesMeta.mann_whitney_test(data)
                state.set_partial_result(("scores", result))
            state.set_progress_value(weights[0])
            if state.is_interruption_requested():
                return

            if not result.scores.annotations or not result.scores.p_vals:
                annot, p_vals = AnnotateSamplesMeta.assign_annotations(
                    result.scores.z_vals, genes, data, p_value_fun=p_value_fun, scoring=scoring
                )
                result.scores.annotations = annot
                result.scores.p_vals = p_vals
                state.set_partial_result(("scores", result))
            state.set_progress_value(weights[1])
            if state.is_interruption_requested():
                return

            result.scores.table = AnnotateSamplesMeta.filter_annotations(
                result.scores.annotations, result.scores.p_vals, p_threshold=p_threshold
            )

        state.set_partial_result(("scores", result))
Пример #32
0
def runner(
    res: ResolweAPI,
    data_objects: List[Data],
    options: DataOutputOptions,
    exp_type: int,
    proc_type: int,
    input_annotation: int,
    state: TaskState,
) -> Table:
    data_frames = []
    metadata = defaultdict(list)

    def parse_sample_descriptor(sample: Sample) -> None:
        general = sample.descriptor.get('general', {})

        for label in SAMPLE_DESCRIPTOR_LABELS:
            metadata[label].append([general.get(label, '')])

        metadata['sample_name'].append([sample.name])

    exp_type = file_output_field = options.expression[exp_type].type
    proc_type = options.process[proc_type].type
    source = options.input_annotation[input_annotation].source
    species = options.input_annotation[input_annotation].species
    build = options.input_annotation[input_annotation].build

    # apply filters
    data_objects = [obj for obj in data_objects if obj.process.type == proc_type]
    data_objects = [
        obj
        for obj in data_objects
        if obj.output['source'] == source and obj.output['species'] == species and obj.output['build'] == build
    ]
    if exp_type != 'rc':
        file_output_field = 'exp'
        data_objects = [obj for obj in data_objects if obj.output['exp_type'] == exp_type]

    if not data_objects:
        raise ResolweDataObjectsNotFound

    step, steps = 0, len(data_objects) + 3

    def set_progress():
        nonlocal step
        step += 1
        state.set_progress_value(100 * (step / steps))

    state.set_status('Downloading ...')
    for data_object in data_objects:
        set_progress()
        parse_sample_descriptor(data_object.sample)
        metadata['expression_type'].append([exp_type.upper()])

        response = res.get_expressions(data_object.id, data_object.output[file_output_field]['file'])
        with io.BytesIO() as f:
            f.write(response.content)
            f.seek(0)
            # expressions to data frame
            df = pd.read_csv(f, sep='\t', compression='gzip')
            df = df.set_index('Gene').T.reset_index(drop=True)
            data_frames.append(df)

    state.set_status('Concatenating samples ...')
    df = pd.concat(data_frames, axis=0)

    state.set_status('To data table ...')
    table = table_from_frame(df)
    set_progress()

    state.set_status('Adding metadata ...')
    metas = [StringVariable(label) for label in metadata.keys()]
    domain = Domain(table.domain.attributes, table.domain.class_vars, metas)
    table = table.transform(domain)

    for key, value in metadata.items():
        table[:, key] = value
    set_progress()

    state.set_status('Matching genes ...')
    tax_id = species_name_to_taxid(species)
    gm = GeneMatcher(tax_id)
    table = gm.match_table_attributes(table, rename=True)
    table.attributes[TableAnnotation.tax_id] = tax_id
    table.attributes[TableAnnotation.gene_as_attr_name] = True
    table.attributes[TableAnnotation.gene_id_attribute] = 'Entrez ID'
    set_progress()

    return table
Пример #33
0
 def worker(self, state: TaskState):
     while True:
         state.set_partial_result(self.update_frame())
         time.sleep(1 / 10)