示例#1
0
def main():
    f = sys.argv[1]
    r = int(getarg(2, 0))
    inname = getarg(3, "data/r%d_lin.p" % (r))

    print("Test with radius=%d and file %s and model %s" % (r, f, inname))

    model = pickle.load(open(inname, "rb"))

    w, h = Image.open(f).size
    X = extract_x([f], r)

    dim = (h - 2 * r, w - 2 * r)
    y = model.predict(X)
    y = np.reshape(y, dim)
    y = denormalize(y)

    if np.any(np.isnan(y)):
        print("result of", f, "has nan values after denormalizing")

    img = Image.fromarray(y.astype(np.uint8))
    outfold = str.replace(str.replace(inname, ".p", ""), "data/", "out/")
    outf = path.join(outfold, path.basename(f))
    mkdir_p(outfold)
    img.save(outf)
示例#2
0
    def run(cls, fragment_file, ligand_chain, ligand_sequence, pdbid):
        base_dir = os.path.dirname(fragment_file)

        ligand_sequence = SeqIO.read(ligand_sequence, "fasta")

        windowdf = pd.DataFrame(shared.create_windows(len(ligand_sequence)))
        pos_list = windowdf['position'].drop_duplicates().tolist()
        windowdf.to_csv(os.path.join(base_dir, "{0}_data.csv".format(pdbid)),
                        index=False)

        print ligand_sequence

        ### Convert Rosetta format to PDB format
        pos_file_dict = dict()
        with open(fragment_file, "r") as ih:
            position = None
            src_pdbid = None
            rows = list()
            for line in ih:
                parts = line.split()
                if line[:8] == "position":
                    position = parts[1]
                    position_path = os.path.join(base_dir, position)
                    position = int(position)
                    keep_position = position in pos_list
                    if keep_position:
                        pos_file_dict[position] = list()
                        if not os.path.isdir(position_path):
                            os.mkdir(position_path)
                        logging.debug("Rosetta to CA for %s", position_path)
                        index = 1
                # Parts is an empty list if line is blank
                elif not parts:
                    if keep_position and rows:
                        assert src_pdbid
                        filepath = os.path.join(position_path,
                                                "frag_%03d.pdb" % index)
                        pos_file_dict[position].append(filepath)
                        if shared.missing(filepath):
                            with open(filepath, "w") as oh:
                                oh.writelines(rows)
                        rows = list()
                        index += 1
                    src_pdbid = None
                elif keep_position:
                    pdbcode, pdbchain, resi, resn, ss, phi, psi, omega, x, y, z = parts
                    new_pdbid = pdbcode + pdbchain
                    if not rows:
                        src_pdbid = new_pdbid
                        rows.append(cls.header_fmt % src_pdbid)
                        res_id = position
                    fmt_list = list(cls.pdb_default)
                    query_idx = res_id - 1
                    assert query_idx >= 0
                    try:
                        query_resn = ligand_sequence[query_idx]
                    except IndexError:
                        print position, index, res_id
                        raise
                    real_res_id = res_id
                    fmt_list[1] = real_res_id
                    fmt_list[4] = shared.one_to_three[query_resn]
                    fmt_list[5] = ligand_chain
                    fmt_list[6] = real_res_id
                    fmt_list[8] = x
                    fmt_list[9] = y
                    fmt_list[10] = z
                    rows.append(cls.pdb_fmt % tuple(fmt_list))
                    res_id += 1

        all_pos = sorted(pos_file_dict.keys(), key=int)
        last_pos = all_pos[-1]

        # Truncate last pos if necessary
        # 1, 7, 13, 19, 25, ... are starts
        if last_pos % 6 != 1:
            parser = PDB.PDBParser(QUIET=True)
            io = PDB.PDBIO()
            # Get computed position from database
            new_start = windowdf[windowdf['position'] ==
                                 last_pos]['res_start'].tolist()[0]
            assert new_start % 6 == 1
            last_pos_dir = os.path.dirname(pos_file_dict[last_pos][0])
            new_dir = os.path.join(
                os.path.dirname(os.path.normpath(last_pos_dir)),
                "{0:.0f}".format(new_start))
            logging.debug("Changing position %s to start at %s", last_pos,
                          new_start)
            shared.mkdir_p(new_dir)
            # ADD NEW DIR TO DICT
            pos_file_dict[new_start] = list()
            residue_remove_slice = slice(new_start - last_pos)
            for fn in pos_file_dict.pop(last_pos):
                structure = parser.get_structure("fragment", fn)
                if len(structure.child_list) != 1:
                    raise MakePdbError("More than one model in %s" % fn)
                model = structure.child_list[0]
                if len(model.child_list) != 1:
                    raise MakePdbError("More than one chain in %s" % fn)
                chain = model[ligand_chain]
                for del_res in chain.get_list()[residue_remove_slice]:
                    chain.detach_child(del_res.id)
                basename = os.path.basename(fn)
                outfile = os.path.join(new_dir, basename)
                io.set_structure(structure)
                io.save(outfile)
                pos_file_dict[new_start].append(outfile)
            shutil.rmtree(last_pos_dir)
示例#3
0
    def select_paths(self,
                     complexname,
                     receptor_chain,
                     ligand_chain,
                     nwindows,
                     ct,
                     dest=None):

        pdb_kwargs = dict(complexname=complexname,
                          receptor_chain=receptor_chain,
                          ligand_chain=ligand_chain,
                          nwindows=nwindows)
        pdbid = "{complexname}{receptor_chain}{ligand_chain}".format(
            **pdb_kwargs)
        pdbwindowid = "{0}{nwindows}".format(pdbid, **pdb_kwargs)

        # Create top directory for pdbid
        # Equivalent to directory argument to constructor
        if dest is None:
            # Default dest is pdbid and window number
            dest = os.path.join(self.working_dir, pdbwindowid)
        else:
            # Place non-absolute dest relative to model db filedir
            if not os.path.isabs(dest):
                dest = os.path.join(self.working_dir, dest)

        # charmm balks at mixed case
        dest = dest.lower()
        shared.mkdir_p(dest)

        path_db = "path_{0}_all.db".format(pdbid)
        path_db = os.path.join(self.working_dir, path_db)
        windows = ["window%s" % x for x in range(nwindows)]
        center_q = """SELECT
        pathsid, nodescore, edgescores, clustersize,
        {windows}
        FROM clusters{nwindows}
        JOIN paths{nwindows} USING (pathsid)
        WHERE is_medoid=1
        """.format(nwindows=nwindows, windows=", ".join(windows))

        center_df = shared.db_to_pandas(center_q, path_db)

        occupancy_csv = "{0}_receptor_occupancy.csv".format(pdbwindowid)
        occupancy_file = os.path.join(self.working_dir, occupancy_csv)
        if shared.missing(occupancy_file):
            logging.warning("%s missing", occupancy_file)
            raise SelectPathsError("No occupancy score")
        # Load occupancy score
        occ_data = pd.read_csv(occupancy_file)
        # Combine occupancy score and other scores
        occ_data.rename(columns=dict(pathid="pathsid"), inplace=True)
        center_df = center_df.merge(occ_data, how="left")
        missing = center_df[center_df.isnull().any(axis=1)]
        if not missing.empty:
            print missing
            raise SelectPathsError("Null scores")

        for x, (scorename, ascending) in enumerate(neco_scores):
            multiplier = 1
            if not ascending:
                multiplier = -1

            if any(pd.isnull(center_df[scorename])):
                logging.error("%s %s", pdbid, scorename)
                raise SelectPathsError("Null values")
            # compute Z-scores
            center_df[scorename + "z"] = self.zscore(center_df[scorename] *
                                                     multiplier)
        center_df["best_score"] = center_df.apply(
            lambda x: min(x[s + "z"] for s, __ in neco_scores), axis=1)

        # compute weighted score
        notb_weight = 1 - b_weight
        notb_scores = [(wght, scrnm)
                       for wght, (scrnm, __) in zip(neco_weights, neco_scores)]

        def score_row(r):
            return b_weight * r['best_score'] + notb_weight * sum(
                wght * r[scrnm + "z"] for wght, scrnm in notb_scores)

        center_df['weighted_score'] = center_df.apply(score_row, axis=1)
        #print center_df.head()
        # take top n
        sorted = center_df.sort_values('weighted_score')
        top_n = sorted.head(ct)
        top_n[[
            'pathsid', 'nodescorez', 'edgescoresz', 'clustersizez',
            'occupancyscorez', 'best_score', 'weighted_score'
        ]].to_csv(os.path.join(dest, "path_scores.csv"), index=False)
        paths = top_n.loc[:, ['pathsid'] + windows]

        model_db_file = "scores_{0}.db".format(pdbid)
        model_db_file = os.path.join(self.working_dir, model_db_file)

        self.combine_paths(paths=paths,
                           model_db_file=model_db_file,
                           dest=dest,
                           **pdb_kwargs)
示例#4
0
        def merge_path(s):
            """
            Create subdirectory and combined.pdb for each path
            """
            # Create subdirectories for pathsid
            pathsid = s['pathsid']
            subdir = os.path.join(top_dir, str(pathsid))
            shared.mkdir_p(subdir)
            outfile = os.path.join(subdir, "%s.pdb" % struct_name)
            if not shared.missing(outfile):
                return outfile
            files = [s[w] for w in window_vars]
            structures = [get_structure(f) for f in files]
            chains = [struc[model_id][ligand_chain] for struc in structures]

            for s_start, c in zip(window_starts, chains):
                # Collect all residues (not modifying chain)
                r_list = [r for r in c]
                # Remove and re-number all residues
                for r in r_list:
                    c.detach_child(r.id)
                    cur_id = list(r.id)
                    cur_id[1] += s_start - 1
                    r.id = tuple(cur_id)
                # Re-add residues to empty chain
                for r in r_list:
                    c.add(r)

            starts = [c.child_list[0].id[1] for c in chains]
            ends = [c.child_list[-1].id[1] for c in chains]

            sb = PDB.StructureBuilder.StructureBuilder()
            sb.init_structure(struct_name)
            sb.init_model(model_id)
            sb.init_seg('    ')
            # Create empty ligand chain
            sb.init_chain(ligand_chain)
            new_struct = sb.get_structure()
            # Add receptor chains
            for ch in receptor_chain:
                new_struct[model_id].add(receptor_model[ch])

            new_chain = new_struct[model_id][ligand_chain]

            for x in xrange(min(starts), max(ends) + 1):
                # Retrieve all residues with id 'x'
                residues = [c[x] for c in chains if x in c]
                # Running total of segment IDs
                n_res = len(residues)
                if n_res == 1:
                    # Unpack single item
                    res, = residues
                    new_chain.add(res)
                elif n_res == 2:
                    # Combined gets averaged position of two residues
                    res1, res2 = residues
                    new_res = res1.copy()
                    for atom1 in res1:
                        atomname = atom1.name
                        atom2 = res2[atomname]
                        new_atom = new_res[atomname]
                        coord1 = atom1.coord
                        coord2 = atom2.coord
                        avg_coord = (coord1 + coord2) / 2.0
                        new_atom.set_coord(avg_coord)
                    new_chain.add(new_res)
                else:
                    raise SelectPathsError("%s residues at %s", n_res, x)

            io.set_structure(new_struct)
            io.save(outfile)
            return outfile
    def cluster_models(self,
                       modelcoord_dict,
                       chain,
                       groupid,
                       cutoff=None,
                       wd=None,
                       cleanup=True):
        """
        Cluster models.

        :param modelrow_dict: paths to cluster keyed by pathsid
        :type modelrow_dict: dict
        :param chain: chain to keep in model
        :param groupid: group identifier
        :type groupid: int
        :param wd: working directory
        :param cleanup: whether to remove merged files
        :type cleanup: bool

        :returns: list of dict
        """
        if cutoff is None:
            cutoff = self.default_cutoff
        if wd is None:
            wd = self.default_wd
        os.chdir(wd)
        ligand_list = os.path.join(wd, "ligand_list_%s.txt" % groupid)
        cluster_err = os.path.join(wd, "clusters_%s_out.txt" % groupid)
        """
        1         2         3         4         5         6         7         8
        12345678901234567890123456789012345678901234567890123456789012345678901234567890
        ATOM    145  N   VAL A  25      32.433  16.336  57.540  1.00 11.92      A1   N
        ATOM    146  CA  VAL A  25      31.132  16.439  58.160  1.00 11.85      A1   C
        """
        pdb_line = "ATOM  {index:5d}  CA  UNK A{index:4d}    {x:8.3f}{y:8.3f}{z:8.3f}  1.00  0.00           C  \n"

        outdir = os.path.join(wd, "{0}merged".format(groupid))
        shared.mkdir_p(outdir)
        file_list = list()
        lig_file_dict = dict()
        for pathsid, coords in modelcoord_dict.iteritems():
            ligandfile = os.path.join(outdir, "path_{0}.pdb".format(pathsid))
            outlines = list()
            for i, (x, y, z) in enumerate(coords):
                outlines.append(pdb_line.format(index=i + 1, x=x, y=y, z=z))
            with open(ligandfile, "w") as oh:
                oh.writelines(outlines)
            lig_file_dict[ligandfile] = pathsid
            file_list.append(ligandfile)
        with open(ligand_list, "w") as oh:
            for fn in file_list:
                oh.write(fn + "\n")

        logging.debug("Clustering...")
        clst_cmd = [
            self.clust_bin, "-L", ligand_list, "-c",
            str(cutoff), "-r", "0.1"
        ]
        proc = subprocess.Popen(clst_cmd,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        out, err = proc.communicate()
        ret = proc.returncode
        if ret:
            raise ClusterPdbError("Clustering exited %s" % ret)
        data_rows, err_lines = self.parse_cluster_out(out)
        with open(cluster_err, "w") as eh:
            eh.writelines(err_lines)
        for row in data_rows:
            row['pathsid'] = lig_file_dict[row['model']]

        if cleanup:
            shutil.rmtree(outdir, ignore_errors=True)
            shared.silent_remove(ligand_list)

        return data_rows
示例#6
0
    def run(cls,
            complexname,
            ligand_chain,
            ligand_sequence,
            psipred_path,
            porter_path,
            jpred_path,
            sspro_path,
            directory=None,
            nfrag=None,
            **kwargs):

        config = shared.load_config()
        make_fragments_pl = os.path.join(
            config['rosetta_path'], "tools/fragment_tools/make_fragments.pl")
        fragment_picker_exe = os.path.join(
            config['rosetta_path'],
            "main/source/bin/fragment_picker.linuxgccrelease")

        #complexname = complexname[:4]

        if directory is None:
            directory = os.path.join(script_dir,
                                     "quota{0}".format(complexname))
        if nfrag is None:
            nfrag = cls.default_nfrag

        directory = os.path.abspath(directory)
        logging.info("DIRECTORY: %s", directory)

        # Check, prepare, run Rosetta
        pdbid = "{0}{1}".format(complexname, ligand_chain)
        output_dir = os.path.join(directory, "output_files")
        fragment_name = "{0}.{1}.9mers".format(pdbid, nfrag)
        fragment_file = os.path.join(output_dir, fragment_name)
        score_name = "{0}.fsc.{1}.9mers".format(pdbid, nfrag)
        score_file = os.path.join(output_dir, score_name)
        input_dir = os.path.join(directory, "input_files")
        path_id = os.path.join(input_dir, pdbid)
        fastain = path_id + ".fasta"
        if shared.missing(fragment_file) or shared.missing(score_file):
            flag_kwargs = dict(pdbid=pdbid,
                               nfrag=nfrag,
                               rosetta_path=config['rosetta_path'])
            template_dir = os.path.join(script_dir, "rosetta_templates")
            # Create Rosetta tree
            shared.mkdir_p(output_dir)
            shared.mkdir_p(input_dir)
            # Check if ss files exist
            for method in cls.ss_methods:
                method_key = "{0}_path".format(method)
                f = locals()[method_key]
                if shared.missing(f):
                    raise RunRosettaError("File %s not found" % f)
                else:
                    flag_kwargs[method_key] = f
            native_line = ""
            protocol_type = ""
            flag_kwargs['native_line'] = native_line
            flag_kwargs['protocol_type'] = protocol_type
            # Copy fasta file
            try:
                shutil.copy(ligand_sequence, fastain)
            except shutil.Error:
                # same file error
                pass
            with shared.CHDIR(input_dir):
                checkpoint_file = "{0}.checkpoint".format(pdbid)
                if shared.missing(checkpoint_file):
                    chk_file = "{0}.chk".format(pdbid)
                    if shared.missing(chk_file):
                        # Run blast
                        cmd = cls.blastcmdfmt.format(id=path_id, **config)
                        cmd = cmd.split()
                        subprocess.check_call(cmd)
                    # Convert to Rosetta checkpoint format
                    #subprocess.check_call([cls.convert_blast, pdbid])
                    subprocess.check_call([
                        cls.convert_blast, make_fragments_pl, fastain, chk_file
                    ])
            # Copy quota sizes
            shutil.copy(os.path.join(template_dir, "quota.def"), input_dir)
            # Copy score weights
            weights_name = "quota-protocol.wghts"
            shutil.copy(os.path.join(template_dir, weights_name), input_dir)
            # Create homolog file
            homolog_file = os.path.join(input_dir,
                                        "{0}.homolog_vall".format(pdbid))
            with open(homolog_file, "w") as oh:
                oh.write("{0}\n".format(pdbid))
            # Create flag file
            with open(
                    os.path.join(template_dir,
                                 "quota-protocol.flags.template")) as ih:
                flags_template = ih.read()
            with open(os.path.join(directory, "quota-protocol.flags"),
                      "w") as oh:
                oh.write(flags_template.format(**flag_kwargs))

            # Run rosetta
            with shared.CHDIR(directory):
                # XXX ask Steve why I need to do this now
                #cmd = "source /usr/local/bio/Modules/default/init/bash; module load rosetta; fragment_picker.linuxgccrelease @quota-protocol.flags"
                #cmd = "module load rosetta; fragment_picker.linuxgccrelease @quota-protocol.flags"
                #bash_cmd = '/bin/bash -c "{0}"'.format(cmd)
                cmd = [fragment_picker_exe, "@quota-protocol.flags"]
                with open("{0}.out".format(pdbid), "w") as oh:
                    #proc = subprocess.Popen(bash_cmd,
                    #shell=True,
                    #stdout=oh,
                    #stderr=oh)
                    proc = subprocess.Popen(cmd, stdout=oh, stderr=oh)
                    returncode = proc.wait()
                    if returncode:
                        raise RunRosettaError("Rosetta exited %s" % returncode)

            # Check if it's actually done
            if shared.missing(fragment_file) or shared.missing(score_file):
                raise RunRosettaError("Rosetta did not finish but exited 0")