Exemplo n.º 1
0
    if reload is not None:
        case.data['metadata']['funfoldes'] = reload
        return case

    # We need to check that the rosetta_scripts executable is available
    if not data['cmd']['folding'][0].is_file() or not os.access(
            str(data['cmd']['folding'][0]), os.X_OK):
        raise IOError('Cannot find executable {}'.format(
            data['cmd']['folding'][0]))

    # Build the structure
    utils.build_template_sketch(case, wpaths['pdb'])

    # Make the Folding and Design RScripts
    data = utils.make_scripts(case, wpaths, data, natbias, layer_design)

    # Finish command
    data = utils.commands(case, nstruct, data, wpaths)

    # Execute
    data = utils.execute(data, wpaths)

    # Update metadata
    data = utils.update_data(data, wpaths)

    # Checkpoint save
    TButil.checkpoint_out(wpaths['checkpoint'], data)
    case.data['metadata']['funfoldes'] = data

    return case
Exemplo n.º 2
0
    def single_execute(self, data: Dict) -> Dict:
        kase = Case(data)
        # Loop MASTER is only applied to a Case with one single connectivity
        if kase.connectivity_count != 1:
            err = f'{self.nodeID} can only be applied to one connectivity. '
            err += f'Current case contains a total of {kase.connectivity_count}.'
            raise NodeDataError(err)
        # And has to be reoriented
        if not kase.is_reoriented:
            self.log.debug(
                'Topology was provided without oriented SSE -> orienting.')
            kase = kase.apply_topologies()[0]

        # Generate the folder tree for a single connectivity.
        folders = kase.connectivities_paths[0].joinpath('loopgroup_master')
        folders.mkdir(parents=True, exist_ok=True)

        # Global step distance
        loop_step = kase.cast_absolute(
        )['configuration.defaults.distance.loop_step']

        # Output keys
        kase.data.setdefault('metadata', {}).setdefault('loop_fragments', [])
        kase.data.setdefault('metadata', {}).setdefault('loop_lengths', [])

        # Find steps: Each pair of secondary structure.
        #it = kase.connectivities_str[0].split('.')
        #steps = [it[i:i + 2] for i in range(0, len(it) - 1)]
        lengths = kase.connectivity_len[0]
        start = 1

        for i, (group, infos) in enumerate(self.steps.items()):
            self.log.info(f'Search at: {group}')

            # 1. Make folders and files
            wfolder = folders.joinpath(f'loopgroup{i + 1:02d}')
            wfolder.mkdir(parents=True, exist_ok=True)
            outfile = wfolder.joinpath(f'loopgroup_master.iter{i + 1:02d}.pdb')
            outfilePDS = wfolder.joinpath(
                f'loopgroup_master.iter{i + 1:02d}.pds')
            masfile = outfile.with_suffix('.master')

            gr = self.steps[f'group{i + 1:02d}'][-1].split(';')
            gr = [int(g) for g in gr if g != 'x']
            for g in gr:
                checkpoint = wfolder.joinpath(f'loop{g:02d}/checkpoint.json')
                # 2. Check if checkpoint exists, retrieve and skip
                reload = TButil.checkpoint_in(self.log, checkpoint)
                if reload is not None:
                    self.log.debug(
                        f'Reloading loopgroup{i + 1:02d} with loop{g:02d}')
                    kase.data['metadata']['loop_fragments'].append(reload)
                    kase.data['metadata']['loop_lengths'].append(
                        int(reload['edges']['loop']))
                    start += (int(reload['edges']['sse1']) +
                              int(reload['edges']['loop']))
                    continue

            # 3. Check hairpin
            # Get SSEs and identifiers
            sses = [kase.get_sse_by_id(sse) for sse in infos[0]]
            #sse1_name, sse2_name = sse1['id'], sse2['id']
            #is_hairpin = self.check_hairpin(sse1_name, sse2_name)

            # 4. Generate structures
            sses = TBstructure.build_pdb_object(self.log,
                                                sses,
                                                5,
                                                concat=False,
                                                outfile=outfile)

            if not masfile.is_file():
                # 5. calculate expected loop length by loop_step
                #Mdis, mdis = TBstructure.get_loop_length(self.log, sse1, sse2, loop_step, self.loop_range)

                # 6. Run MASTER
                #outfilePDS = outfile if outfile is not None else Path(outfile).with_suffix('.pds')
                self.log.debug(f'FILE {outfilePDS}')
                # -> make PDS query
                cmd = TBMaster.createPDS(outfile, outfilePDS)
                self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                run(cmd, stdout=DEVNULL)
                # -> run MASTER
                cmd = TBMaster.master_groupedgap(outfilePDS, self.pdsdb,
                                                 masfile, infos[1],
                                                 self.rmsd_cut)
                self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                result = run(cmd, stdout=DEVNULL)

                # TODO: implement motif compability
                # if result.returncode: # no loop between that connection, e.g. a motif ranging over multiple sse with keeping the loops
                #     # 4. Generate structures
                #     self.log.debug('generate combined structure')
                #     sse = pd.concat([sse1, sse2], sort=False)
                #
                #     # 6. Run MASTER
                #     self.log.debug(Path(outfile))
                #     #outfilePDS = outfile if outfile is not None else Path(outfile).with_suffix('.pds')
                #     self.log.debug(f'FILE {outfilePDS}')
                #     # -> make PDS query
                #     cmd = TBMaster.createPDS(outfile, outfilePDS)
                #     self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                #     run(cmd, stdout=DEVNULL)
                #     # -> run MASTER
                #     cmd = TBMaster.master_nogap(outfilePDS, self.pdsdb, masfile, self.rmsd_cut)
                #     self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                #     run(cmd, stdout=DEVNULL)
                #
                #     # 6. Minimize master data (pick top_loopsx3 lines to read and minimize the files)
                #     match_count = self.minimize_master_file(masfile)
                #     self.log.debug(f'match count here {match_count}')
                #
                #     # 7. Retrieve MASTER data
                #     dfloop = self.process_master_data_no_gap(masfile, sse1_name, sse2_name)
                #     sse1l, loopl, sse2l = lengths[i], int(dfloop['loop_length'].values[0]), lengths[i + 1]
                #     total_len = sse1l + loopl + sse2l
                #     end_edge = total_len + start - 1
                #     edges = {'ini': int(start), 'end': int(end_edge), 'sse1': int(sse1l), 'loop': int(loopl), 'sse2': int(sse2l)}
                #     self.log.debug(f'INI: {start}; END: {end_edge}; SSE1: {sse1l}; LOOP: {loopl}; SSE2: {sse2l}')
                #     self.log.debug(dfloop.to_string())
                #
                #     # 8. Bring and Combine fragments from the different sources.
                #     loop_data = self.make_fragment_files(dfloop, edges, masfile)
                #     loop_data['match_count'] += match_count

                #else:

                # 6. Minimize master data (pick top_loopsx3 lines to read and minimize the files)
                match_count = self.minimize_master_file(masfile)
                # 7. Retrieve MASTER data
                df_container = self.process_master_data(
                    masfile, infos[0], infos[1], infos[2])

                for indx in list(df_container.order.drop_duplicates()):
                    dfloop = df_container[df_container.order == indx]
                    sse1l, loopl, sse2l = lengths[i], int(
                        dfloop['loop_length'].values[0]), lengths[i + 1]
                    total_len = sse1l + loopl + sse2l
                    end_edge = total_len + start - 1
                    edges = {
                        'ini': int(start),
                        'end': int(end_edge),
                        'sse1': int(sse1l),
                        'loop': int(loopl),
                        'sse2': int(sse2l)
                    }
                    self.log.debug(
                        f'INI: {start}; END: {end_edge}; SSE1: {sse1l}; LOOP: {loopl}; SSE2: {sse2l}'
                    )
                    self.log.debug(dfloop.to_string())

                    # 8. Bring and Combine fragments from the different sources.
                    loop_data, nfolder = self.make_fragment_files(dfloop,
                                                                  edges,
                                                                  masfile,
                                                                  no_loop=True)
                    loop_data['match_count'] += match_count

                    # 9. Save data in the Case
                    kase.data['metadata']['loop_fragments'].append(loop_data)
                    kase.data['metadata']['loop_lengths'].append(int(loopl))
                    start += (sse1l + loopl)

                    # 10. Checkpoint save
                    checkpoint = nfolder.joinpath('checkpoint.json')
                    TButil.checkpoint_out(self.log, checkpoint, loop_data)
        return kase
Exemplo n.º 3
0
        rules = list(zip([sse['id'] for sse in CKase.ordered_structures],
                         list(zip(cstrs, cends)),
                         list(next(flip) for _ in range(len(CKase.ordered_structures)))))
        extras = TButil.pdb_geometry_from_rules(query, rules)

        # MASTER search
        createpds = TButil.createPDS(query)
        TButil.plugin_bash(createpds)
        run(createpds, stdout=DEVNULL)
        masters = TButil.master_best_each(query.with_suffix('.pds'), stepfolder.joinpath('_master'), rmsd)
        data = submit_searches(masters, stepfolder, current_case_file, '.'.join([x['id'] for x in sses]))
        data = calc_corrections(data, kase, set(data['layers']), done_l, extras, bin)

        kase.data['metadata']['imaster'].setdefault('step{:02d}'.format(i + 1), data)
        TButil.checkpoint_out(checkpoint, data)
        kase.data['metadata']['corrections'].append(data['corrections'])
        done_l.update(data['layers'])
        corrections.update(data['corrections'])

    return kase


def submit_searches( cmd: List[str], wdir: Path, current_case_file: Path, current_sse: str ) -> Dict:
    """
    """
    unimaster = wdir.joinpath('match.master')
    imaster = Path(__file__).parent.joinpath('imaster.py')
    unidata = wdir.joinpath('geometry.csv')
    if unimaster.is_file() and unidata.is_file():
        return {'matches': unimaster, 'stats': unidata, 'corrections': None,
Exemplo n.º 4
0
        if TBcore.get_option('system', 'debug'):
            sys.stdout.write(
                '\nINI: {}; END: {}; SSE1: {}; LOOP: {}; SSE2: {}\n\n'.format(
                    start, end_edge, sse1l, loopl, sse2l))
            sys.stdout.write(dfloop.to_string() + '\n')

        # 8. Make Fragments
        loop_data = make_fragment_files(dfloop, edges, masfile)
        loop_data['match_count'] += match_count
        case.data['metadata']['loop_fragments'].append(loop_data)
        case.data['metadata']['loop_lengths'].append(int(loopl))

        start += (sse1l + loopl)

        # Checkpoint save
        TButil.checkpoint_out(checkpoint, loop_data)

    return case


def make_fragment_files(dfloop: pd.DataFrame, edges: Dict,
                        masfile: Path) -> Dict:
    """
    """
    data = {
        'loop_length': int(dfloop.iloc[0]['loop_length']),
        'abego': list(dfloop['loop'].values),
        'edges': edges,
        'fragfiles': [],
        'match_count': 0
    }
Exemplo n.º 5
0
            case.data['metadata']['hybridize'] = reload
            return case

        # We need to check that the rosetta_scripts executable is available
        if not data['cmd']['folding'][0].is_file() or not os.access(
                str(data['cmd']['assembly'][0]), os.X_OK):
            raise NodeMissingError(
                f'Cannot find executable {data["cmd"]["assembly"][0]}')

        # Build the structure
        utils.build_template_sketch(self.log, case, wpaths['pdb'])

        # Make the Folding and Design RScripts
        data = utils.make_scripts(self.log, case, wpaths, data, self.natbias,
                                  self.layer_design)

        # Finish command
        data = utils.commands(case, self.nstruct, data, wpaths)

        # Execute
        data = utils.execute(self.log, data, wpaths)

        # Update metadata
        data = utils.update_data(self.log, data, wpaths)

        # Checkpoint save
        TButil.checkpoint_out(self.log, wpaths['checkpoint'], data)
        case.data['metadata']['funfoldes'] = data

        return case
Exemplo n.º 6
0
        # Switch depending on the fragment_protocol
        if self.protocol == 'loop_master':
            frags3, frags9, profile = self.loop_master_protocol(case, folders)
            data['files'] = (frags3, frags9)
            data['profile'] = profile
        if self.protocol == 'loopgroup_master':
            #data['files'] = self.loopgroup_master_protocol(case, folders)
            frags3, frags9, profile = self.loop_master_protocol(case, folders)
            data['files'] = (frags3, frags9)
            data['profile'] = profile

        # Store data
        case.data['metadata']['fragments'] = data

        # Checkpoint save
        TButil.checkpoint_out(self.log, checkpoint, data)
        return case

    def loop_master_protocol(self, case: Case,
                             folders: Path) -> Tuple[str, str]:
        """
        """
        lf = case['metadata.loop_fragments']
        if lf is None:
            raise NodeMissingError(
                'Data that should be loaded through loop_master is not found.')

        for i, loop in enumerate(lf):
            if i == 0:
                ff3 = parse_rosetta_fragments(loop['fragfiles'][0])
                ff9 = parse_rosetta_fragments(loop['fragfiles'][1])