Пример #1
0
    def get_help(self, widget, remote=False, try_path=False):
        data = self.data
        iface = self.iface

        if not data.genes:
            return
        if remote:
            data.msa.algo = repo.toalgo(iface.remote_algo.get_active_text())
            client = repo.TOOLS / 'MSA_clients' / (data.msa.algo + '.py')
            self.set_helpers('%s %s ' % (sys.executable, client),
                             iface.remote_help, data.msa.remote_cmd,
                             data.msa.algo, True, iface.remote_cmd)
        else:
            data.msa.algo = repo.toalgo(iface.msa_algo.get_active_text())
            exe = shutil.which(data.msa.algo)
            exe = widget.get_active_text() if try_path else exe
            if exe:
                # get the --help output and save it in the lookup field on the right
                iface.msa_exe.set_filename(exe)
                self.set_helpers('%s --help; exit 0' % exe, iface.msa_help,
                                 data.msa.cmd, data.msa.algo, False,
                                 iface.msa_cmd)
            else:
                # no executable found; unselect in path box
                iface.msa_exe.unselect_all()
                txt = data.msa.algo + ' was not found on your system $PATH. You can try ' \
                                      'manually specifying the path to the executable.'
                iface.msa_help.get_buffer(
                ).props.text = txt  # show this snarky line above
                iface.msa_cmd.get_buffer().props.text = ''  # no cmd suggestion
Пример #2
0
    def __init__(self):
        super().__init__()
        data = self.data
        iface = self.iface

        iface.msa_algo.set_entry_text_column(0)
        iface.msa_algo.set_id_column(0)
        iface.remote_algo.set_id_column(0)

        iface.msa_cmd.connect(
            'focus_out_event', lambda widget, *args: data.msa.cmd.update({
                repo.toalgo(iface.msa_algo.get_active_text()):
                widget.get_buffer().props.text.strip()
            }))
        iface.remote_cmd.connect(
            'focus_out_event',
            lambda widget, *args: data.msa.remote_cmd.update({
                repo.toalgo(iface.remote_algo.get_active_text()):
                widget.get_buffer().props.text.strip()
            }))

        iface.msa_algo.connect('changed', self.get_help)
        iface.remote_algo.connect('changed', self.get_help, True)
        iface.msa_import.connect('file-set', self.load_msa)
        iface.msa_exe.connect('file-set', self.get_help, True, True)

        # connect buttons
        iface.msa_build.connect('clicked', self.start_align)
        self.bind_accelerator(self.accelerators, iface.msa_build, 'Return')
        iface.remote_build.connect('clicked', self.start_align, True)
        self.bind_accelerator(self.accelerators, iface.remote_build, 'Return')

        data.msa.stack_child_name = iface.align_stack.get_visible_child_name()
Пример #3
0
    def reload_ui_state(self):
        ns = self.data.msa
        iface = self.iface
        iface.align_stack.set_visible_child_name(ns.stack_child_name)
        iface.msa_algo.set_active_id(ns.msa_algo_id)
        if ns.msa_exe_filename:
            iface.msa_exe.set_filename(ns.msa_exe_filename)
        iface.remote_algo.set_active_id(ns.remote_algo_id)
        if ns.msa_import_filename:
            iface.msa_import.set_filename(ns.msa_import_filename)

        iface.msa_cmd.get_buffer().props.text = \
            ns.cmd.get(repo.toalgo(ns.msa_algo_id), '')
        iface.remote_cmd.get_buffer().props.text = \
            ns.remote_cmd.get(repo.toalgo(ns.remote_algo_id), '')
Пример #4
0
 def load_msa(self, widget):
     data = self.data
     iface = self.iface
     try:
         Path.mkdir(self.wd / repo.PATHS.import_msa.parent, exist_ok=True)
         shutil.copy(widget.get_filename(), self.wd / repo.PATHS.import_msa)
     except shutil.SameFileError:
         pass
     except Exception as ex:
         self.show_notification(str(ex))
         LOG.error(ex)
     self.get_hashes(repo.PATHS.import_msa, PAGE)
     data.genes = ['import']
     data.gene_ids = {
         'import': {
             r.id
             for r in SeqIO.parse(self.wd / repo.PATHS.import_msa, 'fasta')
         }
     }
     iface.aligner, cmd = self.get_msa_build_cmd(
         repo.toalgo(iface.msa_algo.get_active_text()), self.wd, data.genes)
     # write a metadata.tsv
     with open(self.wd / repo.PATHS.tsv, 'w') as metadata:
         metadata.write('id\tgene\n')
         for _id in data.gene_ids['import']:
             metadata.write('%s\timport\n' % _id)
     LOG.debug('using imported MSA')
     self.set_changed(PAGE, False)
     self.save_ui_state()
Пример #5
0
    def _do_gbl3(self, shared_ids, arg, gbar, barspace):
        data = self.data
        iface = self.iface

        errors = list()
        msa_lens = list()
        blocks = list()
        array = np.empty(shape=(len(shared_ids), 0), dtype=int)

        for gene in data.genes:
            iface.text = '%s: read MSA' % gene
            LOG.debug(iface.text)
            raw_msa = (self.wd / gene / ('%s_raw_msa.fasta' % gene)).resolve()
            msa = self.wd / gene / ('%s_msa.fasta' % gene)
            records = {r.id: r for r in SeqIO.parse(raw_msa, 'fasta')}
            take_out = {
                _id
                for _id in records.keys() if _id in data.gbl.ignore_ids
            }
            take_out = {_id: records.pop(_id) for _id in take_out}
            if take_out:
                # write newly dropped sequences to backup file
                new_take_out = {
                    _id: r
                    for _id, r in take_out.items()
                    if _id not in iface.tempspace.bak_ignore
                }
                with open(self.wd / gene / ('%s_raw_msa_dropped.fasta' % gene),
                          'a') as fasta:
                    SeqIO.write(new_take_out.values(), fasta, 'fasta')
                # overwrite MSA without all dropped samples
                with open(raw_msa, 'w') as fasta:
                    SeqIO.write(records.values(), fasta, 'fasta')
            if sorted(records.keys()) != shared_ids:
                errors.append(
                    'MSA for %s does not match the dataset, please re-build.' %
                    gene)
                sleep(.1)
                GObject.idle_add(self.stop_gbl, errors)
                return True
            ar = np.array([
                repo.seqtoint(records[_id].seq.upper()) for _id in shared_ids
            ])
            msa_lens.append(ar.shape[1])
            # get the array columns that are not only gaps
            usable_sites = [
                i for i in range(ar.shape[1])
                if set(ar[:, i]) != {repo.toint('-')}
            ]
            array = np.hstack((
                array,
                ar,
            ))  # shared.SEP would need to be stacked here
            del ar
            data.msa_shape[:2] = array.shape[::-1]

            if iface.gbl_preset.get_active_text() == 'skip':
                LOG.debug('skipping %s' % gene)
                shutil.copy(raw_msa, msa)
                iface.i += 1
                continue

            iface.text = '%s: run Gblocks' % gene
            LOG.debug(iface.text)
            with open(self.wd / gene / 'gblocks.log', 'w') as log_handle:
                try:
                    LOG.debug(arg % raw_msa)
                    subprocess.run(arg % raw_msa,
                                   shell=True,
                                   check=True,
                                   stdout=log_handle,
                                   stderr=log_handle)
                except (OSError, subprocess.CalledProcessError) as e:
                    errors.append(str(e))
                    log_handle.write(str(e))
                    continue

            # parse result
            iface.text = '%s: parse result' % gene
            LOG.debug(iface.text)

            shutil.move(raw_msa.with_suffix('.fasta.txt'), msa)
            # get the good blocks from the last pseudo-sequence in the text mask file
            for pseudo_seq in SeqIO.parse(
                    raw_msa.with_suffix('.fasta.txtMask'), 'fasta'):
                mask = pseudo_seq
            # map the Gblocks mask to the original MSA sites
            line_blocks = [
                usable_sites[i] for i, char in enumerate(mask.seq)
                if char == '#'
            ]
            LOG.debug(line_blocks)
            if not line_blocks:
                err = '%s: no good blocks' % gene
                LOG.error(err)
                errors.append(err)
                continue

            shift = sum(msa_lens[:-1])
            blocks.extend([i + shift for i in line_blocks])
            iface.i += 1
        data.msa_lens = msa_lens

        iface.text = 'concatenating MSAs'
        iface.tempspace.bak_ignore = {i for i in data.gbl.ignore_ids}
        if 'aligner' not in iface:
            iface.aligner, cmd = self.get_msa_build_cmd(
                repo.toalgo(iface.msa_algo.get_active_text()), self.wd,
                data.genes)
        iface.aligner.reset_paths(self.wd, self.wd / repo.PATHS.msa)
        data.msa_shape[2], data.msa_shape[3] = iface.aligner.concat_msa(
            gui=shared_ids)
        iface.text = 'computing SHA256 hash'
        LOG.debug(iface.text)
        self.get_hashes(repo.PATHS.msa, PAGE)

        iface.i += 1
        iface.text = 'plot MSAs'
        LOG.debug(iface.text)

        data.gbl_shape[0] = array.shape[1] * self.get_hadj()
        # make gaps transparent
        array = np.ma.masked_where(array > repo.toint('else'), array)

        # create a transparency mask
        gbl_mask = np.full(array.shape, repo.ALPHA)
        gbl_mask[:, blocks] = 1

        data.msa_shape[2] = len(blocks)  # for completeness
        LOG.debug('msa shape: %s' % str(data.msa_shape))
        x_ratio = data.msa_shape[2] / data.msa_shape[0]
        LOG.debug('x ratio: %.3f' % x_ratio)

        # adjust maximum size
        scale = 6
        while max(data.msa_shape[:2]) * scale > 2**14:
            scale -= 1
        LOG.debug('scaling gbl with %d' % scale)

        if iface.gbl_preset.get_active_text() != 'skip':
            for alpha, blocks, gtk_bin, png_path, x_ratio, width \
                    in zip([gbl_mask, 1], [range(array.shape[1]), blocks],
                           [iface.gbl_left_vp, iface.gbl_right_vp],
                           [repo.PATHS.left, repo.PATHS.right], [1, x_ratio],
                           [data.msa_shape[0], data.msa_shape[2]]):
                f = Figure(
                )  # figsize=(width / shared.DPI, data.msa_shape[1] / shared.DPI), dpi=shared.DPI)  # figaspect(data.msa_shape[1] / width))
                f.set_facecolor('none')
                f.set_figheight(data.msa_shape[1] / repo.DPI * 5)
                f.set_figwidth(max(1, width) / repo.DPI)
                # f.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0)

                # leave room at the bottom for the gene legend bar
                b = barspace / data.gbl_shape[1]
                ax = f.add_axes([0, b, 1, 1 - b])

                mat = ax.matshow(array[:, blocks],
                                 alpha=alpha,
                                 cmap=ListedColormap(repo.colors),
                                 vmin=-.5,
                                 vmax=len(repo.colors) - .5,
                                 aspect='auto')
                if not gbar:
                    LOG.debug('adding ticks')
                    [
                        ax.spines[t].set_visible(False)
                        for t in ['left', 'right', 'top', 'bottom']
                    ]
                    ax.yaxis.set_visible(False)
                    ax.xaxis.set_ticks_position('bottom')
                    ax.tick_params(colors=iface.FG,
                                   pad=.2,
                                   length=0,
                                   labelsize=1)
                    ax.xaxis.set_ticks([
                        i for i in range(1, array[:, blocks].shape[1] - 1)
                        if not i % 100
                    ])
                else:
                    ax.axis('off')
                    LOG.debug('adding gene marker bar')
                    # build matrix of gene indicators
                    gm = [[i] * l for i, l in enumerate(msa_lens)]
                    if x_ratio == 1:
                        # add the spacer
                        for i in range(len(data.genes) - 1):
                            gm[i] += [-1] * len(repo.SEP)
                    gm = [gi for gl in gm for gi in gl]
                    # turn into np array
                    gm = np.vstack([np.array(gm)] * 2)
                    # make spacer transparent
                    gm = np.ma.masked_where(gm < 0, gm)
                    # trim the array early
                    gm = gm[:, blocks]
                    gene_colors = get_cmap('GnBu', len(data.genes))

                    # plot the marker bar onto the MSA graphic
                    bax = f.add_axes([0, b * .4, 1, b / 3])
                    bar = bax.pcolormesh(gm, cmap=gene_colors)
                    # bax.axis('off')
                    [
                        bax.spines[t].set_visible(False)
                        for t in ['left', 'right', 'top', 'bottom']
                    ]
                    bax.yaxis.set_visible(False)
                    bax.xaxis.set_ticks_position('bottom')
                    bax.tick_params(colors=iface.FG,
                                    pad=.2,
                                    length=0,
                                    labelsize=1)
                    bax.xaxis.set_ticks(
                        [i for i in range(1, gm.shape[1] - 1) if not i % 100])

                    # plot a legend for the gene marker bar
                    with plt.rc_context({
                            'axes.edgecolor': iface.FG,
                            'xtick.color': iface.FG
                    }):
                        iface.text = 'gene marker bar'
                        LOG.debug(iface.text)
                        Path.mkdir(self.wd / repo.PATHS.phylo_msa.parent,
                                   exist_ok=True)
                        fig = plt.figure(figsize=(4, .2))
                        cax = fig.add_subplot(111)
                        cbar = ColorbarBase(ax=cax,
                                            cmap=gene_colors,
                                            orientation='horizontal',
                                            ticks=[
                                                (.5 / len(data.genes) +
                                                 j * 1 / len(data.genes))
                                                for j in range(len(data.genes))
                                            ])
                        cbar.ax.set_xticklabels(data.genes)
                        fig.savefig(self.wd / repo.PATHS.gbar,
                                    transparent=True,
                                    bbox_inches='tight',
                                    pad_inches=0,
                                    dpi=600)
                        plt.close(fig)
                        del fig, cbar

                iface.i += 1
                iface.text = 'save PNG'
                LOG.debug(iface.text)
                Path.mkdir(self.wd / png_path.parent, exist_ok=True)
                f.savefig(self.wd / png_path,
                          transparent=True,
                          dpi=scale * repo.DPI,
                          bbox_inches='tight',
                          pad_inches=0.00001)

                if iface.rasterize.props.active:
                    iface.text = 'place PNG'
                    LOG.debug(iface.text)
                    self.load_image(
                        iface.zoomer, PAGE, gtk_bin, self.wd / png_path,
                        data.gbl_shape[0] * x_ratio * self.get_hadj(),
                        data.gbl_shape[1])
                else:
                    iface.text = 'place vector'
                    LOG.debug(iface.text)
                    canvas = FigureCanvas(f)
                    canvas.set_size_request(
                        max(len(blocks) * self.get_hadj(), -1),
                        data.gbl_shape[1])  # width, height
                    try:
                        ch = gtk_bin.get_child()
                        if ch:
                            gtk_bin.remove(ch)
                        gtk_bin.add(canvas)
                    except Exception as ex:
                        LOG.error(ex)
                iface.i += 1

                gtk_bin.realize()
                gtk_bin.show_all()

        # re-size
        LOG.debug('re-sizing again')
        for wi in [iface.gbl_left, iface.gbl_right]:
            wi.set_max_content_height(data.gbl_shape[1])

        self.load_colorbar(iface.palplot2)

        if gbar:
            self.load_colorbar(iface.gbar1, gbar=True)
            iface.gbar1.set_visible(True)
        else:
            iface.gbar1.set_visible(False)

        iface.text = 'idle'
        iface.frac = 1
        sleep(.1)
        GObject.idle_add(self.stop_gbl, errors)
        return True