Пример #1
0
    def __init__(self,
                 targetid,
                 ensembler_stage=None,
                 project_dir='.',
                 log_level=None):
        ensembler.core.check_project_toplevel_dir()
        ensembler.utils.set_loglevel(log_level)
        self.targetid = targetid
        self.models_target_dir = os.path.join(
            ensembler.core.default_project_dirnames.models, self.targetid)
        self.project_dir = project_dir
        if ensembler_stage is not None:
            self.ensembler_stage = ensembler_stage
        else:
            for stagename in ['refine_explicit_md', 'refine_implicit_md']:
                if check_ensembler_modeling_stage_complete(
                        stagename, targetid):
                    self.ensembler_stage = stagename
                    break
            if self.ensembler_stage is None:
                raise Exception(
                    'Models have not yet been built for this Ensembler project.'
                )

        if self.ensembler_stage == 'refine_implicit_md':
            self.energies_filename = 'implicit-energies.txt'
        elif self.ensembler_stage == 'refine_explicit_md':
            self.energies_filename = 'explicit-energies.txt'

        self._get_templateids_and_template_filepaths()
        self._get_unique_models()
        self._get_successful()
        self._get_final_energies()
        self._get_seqids()
Пример #2
0
    def __init__(self, targetid, ensembler_stage=None, project_dir='.', log_level=None):
        ensembler.core.check_project_toplevel_dir()
        ensembler.utils.loglevel_setter(logger, log_level)
        self.targetid = targetid
        self.models_target_dir = os.path.join(ensembler.core.default_project_dirnames.models, self.targetid)
        self.project_dir = project_dir
        if ensembler_stage is not None:
            self.ensembler_stage = ensembler_stage
        else:
            for stagename in ['refine_explicit_md', 'refine_implicit_md']:
                if check_ensembler_modeling_stage_complete(stagename, targetid):
                    self.ensembler_stage = stagename
                    break
            if self.ensembler_stage is None:
                raise Exception('Models have not yet been built for this Ensembler project.')

        if self.ensembler_stage == 'refine_implicit_md':
            self.energies_filename = 'implicit-energies.txt'
        elif self.ensembler_stage == 'refine_explicit_md':
            self.energies_filename = 'explicit-energies.txt'

        self._get_templateids_and_template_filepaths()
        self._get_unique_models()
        self._get_successful()
        self._get_final_energies()
        self._get_seqids()
Пример #3
0
    def __init__(self,
                 targetid,
                 ensembler_stage=None,
                 project_dir='.',
                 log_level=None):
        ensembler.core.check_project_toplevel_dir()
        ensembler.utils.set_loglevel(log_level)
        self.targetid = targetid
        self.models_target_dir = os.path.join(
            ensembler.core.default_project_dirnames.models, self.targetid)
        self.project_dir = project_dir
        if ensembler_stage is not None:
            self.ensembler_stage = ensembler_stage
        else:
            for stagename in [
                    'refine_explicit_md', 'refine_implicit_md', 'build_models'
            ]:
                if check_ensembler_modeling_stage_complete(
                        stagename, targetid):
                    self.ensembler_stage = stagename
                    break
            if self.ensembler_stage is None:
                raise Exception(
                    'Models have not yet been built for this Ensembler project.'
                )
        self.model_filename = ensembler.core.model_filenames_by_ensembler_stage[
            self.ensembler_stage]

        self._get_templateids_and_model_filepaths()
        self._get_unique_models()
        self._get_seqids()
        self._store_highest_seqid_model()
        self._mk_traj()
Пример #4
0
    def __init__(self, targetid, ensembler_stage=None, project_dir='.', log_level=None):
        ensembler.core.check_project_toplevel_dir()
        ensembler.utils.set_loglevel(log_level)
        self.targetid = targetid
        self.models_target_dir = os.path.join(ensembler.core.default_project_dirnames.models, self.targetid)
        self.project_dir = project_dir
        if ensembler_stage is not None:
            self.ensembler_stage = ensembler_stage
        else:
            for stagename in ['refine_explicit_md', 'refine_implicit_md', 'build_models']:
                if check_ensembler_modeling_stage_complete(stagename, targetid):
                    self.ensembler_stage = stagename
                    break
            if self.ensembler_stage is None:
                raise Exception('Models have not yet been built for this Ensembler project.')
        self.model_filename = ensembler.core.model_filenames_by_ensembler_stage[self.ensembler_stage]

        self._get_templateids_and_model_filepaths()
        self._get_unique_models()
        self._get_seqids()
        self._store_highest_seqid_model()
        self._mk_traj()
Пример #5
0
def mktraj(targetid, ensembler_stage=None, traj_filepath=None, topol_filepath=None, models_data_filepath=None, process_only_these_templates=None):
    """Makes a trajectory for a given target, using mdtraj. The trajectory can be used with other
    software, e.g. for visualization with PyMOL or VMD.

    Parameters
    ----------
    targetid : str
        e.g. 'EGFR_HUMAN_D0'
    ensembler_stage : str
        The Ensembler stage from which to build models, e.g. 'build_models' results in a trajectory
        built from the 'model.pdb.gz' files output by the build_models command.
        options: build_models|refine_implicit_md|refine_explicit_md
        default: most advanced stage for which model files are available
    traj_filepath : str
        default: models/[targetid]/traj-[ensembler_stage].xtc
    topol_filepath : str
        default: models/[targetid]/traj-[ensembler_stage]-topol.pdb
    models_data_filepath :
        default: models/[targetid]/traj-[ensembler_stage]-data.csv
    process_only_these_templates : list of str

    Returns
    -------
    traj : mdtraj.Trajectory
    df : pandas.DataFrame
        models data (e.g. sequence identities):
    """
    ensembler.core.check_project_toplevel_dir()
    models_target_dir = os.path.join(ensembler.core.default_project_dirnames.models, targetid)

    logger.debug('Working on target %s' % targetid)

    if ensembler_stage is None:
        for stagename in ['refine_explicit_md', 'refine_implicit_md', 'build_models']:
            if check_ensembler_modeling_stage_complete(stagename, targetid):
                ensembler_stage = stagename
                break

    if ensembler_stage is None:
        raise Exception('Models have not yet been built for this Ensembler project.')

    if traj_filepath is None:
        traj_filepath = os.path.join(models_target_dir, 'traj-{0}.xtc'.format(ensembler_stage))
    if topol_filepath is None:
        topol_filepath = os.path.join(models_target_dir, 'traj-{0}-topol.pdb'.format(ensembler_stage))
    if models_data_filepath is None:
        models_data_filepath = os.path.join(models_target_dir, 'traj-{0}-data.csv'.format(ensembler_stage))

    if process_only_these_templates:
        templateids = process_only_these_templates
    else:
        dirs = os.walk(models_target_dir).next()[1]
        templateids = [dir for dir in dirs if '_D' in dir]

    model_filename = ensembler.core.model_filenames_by_ensembler_stage[ensembler_stage]
    valid_model_templateids = [templateid for templateid in templateids if os.path.exists(os.path.join(models_target_dir, templateid, model_filename))]
    valid_model_filepaths = [os.path.join(models_target_dir, templateid, model_filename) for templateid in valid_model_templateids]

    seqid_filepaths = [os.path.join(models_target_dir, templateid, 'sequence-identity.txt') for templateid in valid_model_templateids]
    seqids = [float(open(seqid_filepath).read().strip()) if os.path.exists(seqid_filepath) else None for seqid_filepath in seqid_filepaths]

    df = pd.DataFrame({
        'templateid': valid_model_templateids,
        'model_filepath': valid_model_filepaths,
        'seqid': seqids,
    })
    df.sort(columns='seqid', inplace=True, ascending=False)
    df.reset_index(drop=True, inplace=True)

    df.to_csv(models_data_filepath, columns=['templateid', 'seqid'])

    # construct traj
    traj = mdtraj.load_pdb(df.model_filepath[0])
    for model_filepath in df.model_filepath[1:]:
        traj += mdtraj.load_pdb(model_filepath)

    # superpose structured C-alphas
    dssp = mdtraj.compute_dssp(traj[0])[0]
    structured_resis_bool = (dssp == 'H') + (dssp == 'E')
    alpha_indices = traj.topology.select_atom_indices('alpha')
    structured_alpha_indices = np.array([alpha_indices[x] for x in range(traj.n_residues) if structured_resis_bool[x]])
    traj.superpose(reference=traj, frame=0, atom_indices=structured_alpha_indices)

    # write traj, and write first frame as pdb file
    traj[0].save(topol_filepath)
    traj.save(traj_filepath)
    return traj, df