Пример #1
0
def fetch_pdb(pdbid):
    """Get the newest entry from the RCSB server for the given PDB ID. Exits with '1' if PDB ID is invalid."""
    pdbid = pdbid.lower()
    logger.info(f'checking status of PDB-ID {pdbid}')
    # @todo re-implement state check with ew RCSB API, see https://www.rcsb.org/news?year=2020&article=5eb18ccfd62245129947212a&feature=true
    # state, current_entry = check_pdb_status(pdbid)  # Get state and current PDB ID
    #
    # if state == 'OBSOLETE':
    #     logger.info(f'entry is obsolete, getting {current_entry} instead')
    # elif state == 'CURRENT':
    #     logger.info('entry is up-to-date')
    # elif state == 'UNKNOWN':
    #     logger.error('invalid PDB-ID (entry does not exist on PDB server)')
    #     sys.exit(1)
    logger.info('downloading file from PDB')
    # get URL for current entry
    # @todo needs update to react properly on response codes of RCSB servers
    pdburl = f'http://www.rcsb.org/pdb/files/{pdbid}.pdb'
    try:
        pdbfile = urlopen(pdburl).read().decode()
        # If no PDB file is available, a text is now shown with "We're sorry, but ..."
        # Could previously be distinguished by an HTTP error
        if 'sorry' in pdbfile:
            logger.error('no file in PDB format available from wwPDB for the given PDB ID.')
            sys.exit(1)
    except HTTPError:
        logger.error('no file in PDB format available from wwPDB for the given PDB ID')
        sys.exit(1)
    return [pdbfile, pdbid]
Пример #2
0
def run_analysis(inputstructs, inputpdbids):
    """Main function. Calls functions for processing, report generation and visualization."""
    pdbid, pdbpath = None, None
    # @todo For multiprocessing, implement better stacktracing for errors
    # Print title and version
    logger.info(f'Protein-Ligand Interaction Profiler (PLIP) {__version__}')
    logger.info(f'brought to you by: {config.__maintainer__}')
    logger.info(f'please cite: https://www.doi.org/10.1093/nar/gkv315')
    output_prefix = config.OUTPUTFILENAME

    if inputstructs is not None:  # Process PDB file(s)
        num_structures = len(inputstructs)
        inputstructs = remove_duplicates(inputstructs)
        read_from_stdin = False
        for inputstruct in inputstructs:
            if inputstruct == '-':
                inputstruct = sys.stdin.read()
                read_from_stdin = True
                if config.RAWSTRING:
                    if sys.version_info < (3, ):
                        inputstruct = bytes(inputstruct).decode(
                            'unicode_escape')
                    else:
                        inputstruct = bytes(inputstruct,
                                            'utf8').decode('unicode_escape')
            else:
                if os.path.getsize(inputstruct) == 0:
                    logger.error('empty PDB file')
                    sys.exit(1)
                if num_structures > 1:
                    basename = inputstruct.split('.')[-2].split('/')[-1]
                    config.OUTPATH = '/'.join([config.BASEPATH, basename])
                    output_prefix = 'report'
            process_pdb(inputstruct,
                        config.OUTPATH,
                        as_string=read_from_stdin,
                        outputprefix=output_prefix)
    else:  # Try to fetch the current PDB structure(s) directly from the RCBS server
        num_pdbids = len(inputpdbids)
        inputpdbids = remove_duplicates(inputpdbids)
        for inputpdbid in inputpdbids:
            pdbpath, pdbid = download_structure(inputpdbid)
            if num_pdbids > 1:
                config.OUTPATH = '/'.join(
                    [config.BASEPATH, pdbid[1:3].upper(),
                     pdbid.upper()])
                output_prefix = 'report'
            process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix)

    if (pdbid is not None
            or inputstructs is not None) and config.BASEPATH is not None:
        if config.BASEPATH in ['.', './']:
            logger.info(
                'finished analysis, find the result files in the working directory'
            )
        else:
            logger.info(
                f'finished analysis, find the result files in {config.BASEPATH}'
            )
Пример #3
0
def download_structure(inputpdbid):
    """Given a PDB ID, downloads the corresponding PDB structure.
    Checks for validity of ID and handles error while downloading.
    Returns the path of the downloaded file."""
    try:
        if len(inputpdbid) != 4 or extract_pdbid(inputpdbid.lower()) == 'UnknownProtein':
            logger.error(f'invalid PDB-ID (wrong format): {inputpdbid}')
            sys.exit(1)
        pdbfile, pdbid = fetch_pdb(inputpdbid.lower())
        pdbpath = tilde_expansion('%s/%s.pdb' % (config.BASEPATH.rstrip('/'), pdbid))
        create_folder_if_not_exists(config.BASEPATH)
        with open(pdbpath, 'w') as g:
            g.write(pdbfile)
        logger.info(f'file downloaded as {pdbpath}')
        return pdbpath, pdbid

    except ValueError:  # Invalid PDB ID, cannot fetch from RCBS server
        logger.error(f'PDB-ID does not exist: {inputpdbid}')
        sys.exit(1)
Пример #4
0
def readmol(path, as_string=False):
    """Reads the given molecule file and returns the corresponding Pybel molecule as well as the input file type.
    In contrast to the standard Pybel implementation, the file is closed properly."""
    supported_formats = ['pdb']
    # Fix for Windows-generated files: Remove carriage return characters
    if "\r" in path and as_string:
        path = path.replace('\r', '')

    for sformat in supported_formats:
        obc = pybel.ob.OBConversion()
        obc.SetInFormat(sformat)
        logger.debug(
            f'detected {sformat} as format, trying to read file with OpenBabel'
        )

        # Read molecules with single bond information
        if as_string:
            try:
                mymol = pybel.readstring(sformat, path)
            except IOError:
                logger.error('no valid file format provided')
                sys.exit(1)
        else:
            read_file = pybel.readfile(format=sformat,
                                       filename=path,
                                       opt={"s": None})
            try:
                mymol = next(read_file)
            except StopIteration:
                logger.error('file contains no valid molecules')
                sys.exit(1)

        logger.debug('molecule successfully read')

        # Assign multiple bonds
        mymol.OBMol.PerceiveBondOrders()
        return mymol, sformat

    logger.error('no valid file format provided')
    sys.exit(1)
Пример #5
0
def main():
    """Parse command line arguments and start main script for analysis."""
    parser = ArgumentParser(prog="PLIP", description=description)
    pdbstructure = parser.add_mutually_exclusive_group(
        required=True)  # Needs either PDB ID or file
    # '-' as file name reads from stdin
    pdbstructure.add_argument("-f",
                              "--file",
                              dest="input",
                              nargs="+",
                              help="Set input file, '-' reads from stdin")
    pdbstructure.add_argument("-i", "--input", dest="pdbid", nargs="+")
    outputgroup = parser.add_mutually_exclusive_group(
        required=False)  # Needs either outpath or stdout
    outputgroup.add_argument("-o", "--out", dest="outpath", default="./")
    outputgroup.add_argument("-O",
                             "--stdout",
                             dest="stdout",
                             action="store_true",
                             default=False,
                             help="Write to stdout instead of file")
    parser.add_argument("--rawstring",
                        dest="use_raw_string",
                        default=False,
                        action="store_true",
                        help="Use Python raw strings for stdin")
    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        default=False,
                        help="Turn on verbose mode",
                        action="store_true")
    parser.add_argument("-q",
                        "--quiet",
                        dest="quiet",
                        default=False,
                        help="Turn on quiet mode",
                        action="store_true")
    parser.add_argument("-s",
                        "--silent",
                        dest="silent",
                        default=False,
                        help="Turn on silent mode",
                        action="store_true")
    parser.add_argument("-p",
                        "--pics",
                        dest="pics",
                        default=False,
                        help="Additional pictures",
                        action="store_true")
    parser.add_argument("-x",
                        "--xml",
                        dest="xml",
                        default=False,
                        help="Generate report file in XML format",
                        action="store_true")
    parser.add_argument("-t",
                        "--txt",
                        dest="txt",
                        default=False,
                        help="Generate report file in TXT (RST) format",
                        action="store_true")
    parser.add_argument("-y",
                        "--pymol",
                        dest="pymol",
                        default=False,
                        help="Additional PyMOL session files",
                        action="store_true")
    parser.add_argument(
        "--maxthreads",
        dest="maxthreads",
        default=multiprocessing.cpu_count(),
        help=
        "Set maximum number of main threads (number of binding sites processed simultaneously)."
        "If not set, PLIP uses all available CPUs if possible.",
        type=int)
    parser.add_argument(
        "--breakcomposite",
        dest="breakcomposite",
        default=False,
        help=
        "Don't combine ligand fragments with covalent bonds but treat them as single ligands for the analysis.",
        action="store_true")
    parser.add_argument(
        "--altlocation",
        dest="altlocation",
        default=False,
        help=
        "Also consider alternate locations for atoms (e.g. alternate conformations).",
        action="store_true")
    parser.add_argument("--nofix",
                        dest="nofix",
                        default=False,
                        help="Turns off fixing of PDB files.",
                        action="store_true")
    parser.add_argument("--nofixfile",
                        dest="nofixfile",
                        default=False,
                        help="Turns off writing files for fixed PDB files.",
                        action="store_true")
    parser.add_argument(
        "--nopdbcanmap",
        dest="nopdbcanmap",
        default=False,
        help=
        "Turns off calculation of mapping between canonical and PDB atom order for ligands.",
        action="store_true")
    parser.add_argument(
        "--dnareceptor",
        dest="dnareceptor",
        default=False,
        help=
        "Treat nucleic acids as part of the receptor structure (together with any present protein) instead of as a ligand.",
        action="store_true")
    parser.add_argument(
        "--name",
        dest="outputfilename",
        default="report",
        help=
        "Set a filename for the report TXT and XML files. Will only work when processing single structures."
    )
    ligandtype = parser.add_mutually_exclusive_group(
    )  # Either peptide/inter or intra mode
    ligandtype.add_argument(
        "--peptides",
        "--inter",
        dest="peptides",
        default=[],
        help=
        "Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts",
        nargs="+")
    ligandtype.add_argument(
        "--intra",
        dest="intra",
        help="Allows to define one chain to analyze intra-chain contacts.")
    parser.add_argument("--keepmod",
                        dest="keepmod",
                        default=False,
                        help="Keep modified residues as ligands",
                        action="store_true")
    parser.add_argument(
        "--nohydro",
        dest="nohydro",
        default=False,
        help=
        "Do not add polar hydrogens in case your structure already contains hydrogens.",
        action="store_true")
    parser.add_argument(
        "--model",
        dest="model",
        default=1,
        type=int,
        help="Model number to be used for multi-model structures.")
    # Optional threshold arguments, not shown in help
    thr = namedtuple('threshold', 'name type')
    thresholds = [
        thr(name='aromatic_planarity', type='angle'),
        thr(name='hydroph_dist_max', type='distance'),
        thr(name='hbond_dist_max', type='distance'),
        thr(name='hbond_don_angle_min', type='angle'),
        thr(name='pistack_dist_max', type='distance'),
        thr(name='pistack_ang_dev', type='other'),
        thr(name='pistack_offset_max', type='distance'),
        thr(name='pication_dist_max', type='distance'),
        thr(name='saltbridge_dist_max', type='distance'),
        thr(name='halogen_dist_max', type='distance'),
        thr(name='halogen_acc_angle', type='angle'),
        thr(name='halogen_don_angle', type='angle'),
        thr(name='halogen_angle_dev', type='other'),
        thr(name='water_bridge_mindist', type='distance'),
        thr(name='water_bridge_maxdist', type='distance'),
        thr(name='water_bridge_omega_min', type='angle'),
        thr(name='water_bridge_omega_max', type='angle'),
        thr(name='water_bridge_theta_min', type='angle')
    ]
    for t in thresholds:
        parser.add_argument('--%s' % t.name,
                            dest=t.name,
                            type=lambda val: threshold_limiter(parser, val),
                            help=argparse.SUPPRESS)
    arguments = parser.parse_args()
    # configure log levels
    config.VERBOSE = True if arguments.verbose else False
    config.QUIET = True if arguments.quiet else False
    config.SILENT = True if arguments.silent else False
    if config.VERBOSE:
        logger.setLevel(logging.DEBUG)
    elif config.QUIET:
        logger.setLevel(logging.WARN)
    elif config.SILENT:
        logger.setLevel(logging.CRITICAL)
    else:
        logger.setLevel(config.DEFAULT_LOG_LEVEL)
    config.MAXTHREADS = arguments.maxthreads
    config.XML = arguments.xml
    config.TXT = arguments.txt
    config.PICS = arguments.pics
    config.PYMOL = arguments.pymol
    config.STDOUT = arguments.stdout
    config.RAWSTRING = arguments.use_raw_string
    config.OUTPATH = arguments.outpath
    config.OUTPATH = tilde_expansion(
        "".join([config.OUTPATH, '/']
                ) if not config.OUTPATH.endswith('/') else config.OUTPATH)
    config.BASEPATH = config.OUTPATH  # Used for batch processing
    config.BREAKCOMPOSITE = arguments.breakcomposite
    config.ALTLOC = arguments.altlocation
    config.PEPTIDES = arguments.peptides
    config.INTRA = arguments.intra
    config.NOFIX = arguments.nofix
    config.NOFIXFILE = arguments.nofixfile
    config.NOPDBCANMAP = arguments.nopdbcanmap
    config.KEEPMOD = arguments.keepmod
    config.DNARECEPTOR = arguments.dnareceptor
    config.OUTPUTFILENAME = arguments.outputfilename
    config.NOHYDRO = arguments.nohydro
    config.MODEL = arguments.model
    # Make sure we have pymol with --pics and --pymol
    if config.PICS or config.PYMOL:
        try:
            import pymol
        except ImportError:
            logger.error('PyMOL is required for the --pics and --pymol option')
            sys.exit(1)
    # Assign values to global thresholds
    for t in thresholds:
        tvalue = getattr(arguments, t.name)
        if tvalue is not None:
            if t.type == 'angle' and not 0 < tvalue < 180:  # Check value for angle thresholds
                parser.error(
                    "Threshold for angles need to have values within 0 and 180."
                )
            if t.type == 'distance':
                if tvalue > 10:  # Check value for angle thresholds
                    parser.error(
                        "Threshold for distances must not be larger than 10 Angstrom."
                    )
                elif tvalue > config.BS_DIST + 1:  # Dynamically adapt the search space for binding site residues
                    config.BS_DIST = tvalue + 1
            setattr(config, t.name.upper(), tvalue)
    # Check additional conditions for interdependent thresholds
    if not config.HALOGEN_ACC_ANGLE > config.HALOGEN_ANGLE_DEV:
        parser.error(
            "The halogen acceptor angle has to be larger than the halogen angle deviation."
        )
    if not config.HALOGEN_DON_ANGLE > config.HALOGEN_ANGLE_DEV:
        parser.error(
            "The halogen donor angle has to be larger than the halogen angle deviation."
        )
    if not config.WATER_BRIDGE_MINDIST < config.WATER_BRIDGE_MAXDIST:
        parser.error(
            "The water bridge minimum distance has to be smaller than the water bridge maximum distance."
        )
    if not config.WATER_BRIDGE_OMEGA_MIN < config.WATER_BRIDGE_OMEGA_MAX:
        parser.error(
            "The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle"
        )
    expanded_path = tilde_expansion(
        arguments.input) if arguments.input is not None else None
    run_analysis(expanded_path, arguments.pdbid)  # Start main script