Пример #1
0
def get_final_score(data_dir):
    '''
    1. run iAlign,  -> structure_align.out 
    2. run PSI-BLAST,  -> sequence_align.out
    3. get SSIP score, -> ssip_score.txt
    4. run EvoEF,      -> evoef_score.txt
    5. get final score -> result.txt
    '''
    #### make temporary directory ####
    tmp_dir = "/tmp/" + os.getenv("USER") + "/" + os.path.basename(data_dir)
    if os.path.isdir(tmp_dir):
        shutil.rmtree(tmp_dir)
    os.makedirs(tmp_dir)

    #### parse input files ####
    isscore = "0.5"
    force_field = "SSIPE"  # SSIPe force field by default

    isscore_file = os.path.join(data_dir, "isscore")
    force_field_file = os.path.join(data_dir, "force_field")
    pdb_file = os.path.join(data_dir, "complex.pdb")
    seq_file = os.path.join(data_dir, "seq.fasta")
    mutlist_file = os.path.join(data_dir, "mutList.txt")
    align_file1 = os.path.join(data_dir, "structure_align.out")
    align_file2 = os.path.join(data_dir, "sequence_align.out")
    ssipscore_file = os.path.join(data_dir, "ssip_score.txt")
    evoefscore_file = os.path.join(data_dir, "evoef_score.txt")
    result_file = os.path.join(data_dir, "result.txt")

    if os.path.isfile(isscore_file):
        fp = open(isscore_file, 'rU')
        isscore = fp.read().strip()
        fp.close()
    if os.path.isfile(force_field_file):
        fp = open(force_field_file, 'rU')
        force_field = fp.read().strip()
        fp.close()

    #### define temporary files ####
    pdb_tmp = os.path.join(tmp_dir, "complex.pdb")
    seq_tmp = os.path.join(tmp_dir, "seq.fasta")
    align_tmp1 = os.path.join(tmp_dir, "structure_align.out")
    align_tmp2 = os.path.join(tmp_dir, "sequence_align.out")
    mutlist_tmp = os.path.join(tmp_dir, "mutList.txt")
    ssipscore_tmp = os.path.join(tmp_dir, "ssip_score.txt")
    evoefscore_tmp = os.path.join(tmp_dir, "evoef_score.txt")

    #### renumber residue index and output the files to temporary folder ####
    #resi_dict,inv_resi_dict=reindex_complex(mutlist_file,pdb_file,mutlist_tmp,pdb_tmp)
    shutil.copy(mutlist_file, mutlist_tmp)
    shutil.copy(pdb_file, pdb_tmp)

    #### interface alignment ####
    sys.stdout.write("step 1: run iAlign\n")
    if os.path.isfile(align_file1):
        shutil.copy(align_file1, align_tmp1)
    else:
        newscore = max(float(isscore), 0.45)
        newscore = min(newscore, 0.55)
        isscore = str(newscore)
        cmd = ' '.join([
            "cd", tmp_dir, ';', run_align, pdb_tmp,
            str(newscore), align_tmp1
        ])
        os.system(cmd)
        align_file1 = os.path.join(data_dir, "structure_align.out")
        shutil.copy(align_tmp1, align_file1)

    #### fix insertion code in align.out ####
    fp = open(align_tmp1, 'rU')
    lines = fp.read().splitlines()
    fp.close()
    txt = ''
    insert_code_pattern = re.compile("\d+[A-Za-z]")
    for line in lines:
        if not line.startswith("residue_") or not insert_code_pattern.search(
                line):
            txt += line + '\n'
        else:
            for resi in line.split(' '):
                if resi == '':
                    txt += ' '
                elif not insert_code_pattern.match(resi):
                    txt += resi
                else:
                    txt += ' ' + resi[:-1]
            txt += '\n'
    fp = open(align_tmp1, 'w')
    fp.write(txt)
    fp.close()

    #### run psiblast to get sequence interface alignment ####
    sys.stdout.write("step 2: run psiblast\n")
    if (os.path.isfile(align_tmp1)):
        if (os.path.isfile(align_file2)):
            shutil.copy(align_file2, align_tmp2)
        else:
            cmd = ' '.join([
                "cd", tmp_dir, ";", run_seqalign, pdb_tmp, align_tmp1,
                align_tmp2
            ])
            os.system(cmd)
            align_file2 = os.path.join(data_dir, "sequence_align.out")
            shutil.copy(align_tmp2, align_file2)
    else:
        sys.stdout.write(
            "step 2: skip psiblast because iAlign alignment file doesn't exist"
        )

    #### get SSIP score ####
    sys.stdout.write("step 3: get_ssip_score\n")
    if (os.path.isfile(align_tmp1) or os.path.isfile(align_tmp2)):
        cmd = ' '.join([
            "cd", tmp_dir, ';', get_ssip_score, mutlist_tmp, align_tmp1,
            align_tmp2, isscore, ssipscore_tmp
        ])
        os.system(cmd)
        shutil.copy(ssipscore_tmp, ssipscore_file)
        #copy the sorted file back to the data_dir
        shutil.copy(align_tmp1, align_file1)
        shutil.copy(align_tmp2, align_file2)
    else:
        sys.stdout.write(
            "step 3: skip get_ssip_score because structure_align.out and sequence_align.out don't exist"
        )

    #### get evoef score ####
    sys.stdout.write("step 4: get_evoef_score\n")
    if force_field == "SSIPE":
        if os.path.isfile(evoefscore_file):
            shutil.copy(evoefscore_file, evoefscore_tmp)
        else:
            #shutil.copy(pdb_file,pdb_tmp)
            cmd = ' '.join([
                "cd", tmp_dir, ';', run_evoef, pdb_tmp, mutlist_tmp,
                evoefscore_tmp
            ])
            os.system(cmd)
            shutil.copy(evoefscore_tmp, evoefscore_file)

    #### get final score ####
    sys.stdout.write("step 5: get_final_score\n")
    fp = open(mutlist_file, 'rU')
    mutation_list = fp.read().strip().splitlines()
    fp.close()
    fp = open(ssipscore_tmp, 'rU')
    ssipscore_list = [float(l) for l in fp.read().splitlines()]
    fp.close()
    finalscore_list = []
    if os.path.isfile(evoefscore_tmp):
        fp = open(evoefscore_tmp, 'rU')
        evoefscore_list = [float(f) for f in fp.read().splitlines()]
        fp.close()
        for x, f, m in zip(ssipscore_list, evoefscore_list, mutation_list):
            if x == 0. and f == 0.:
                finalscore_list.append(0.)
            else:
                finalscore_list.append(0.734 * x + 0.341 * f + 0.205)
        #finalscore_list=[0.74*x+0.33*f+0.26 for x,f,m in \
        #    zip(ssipscore_list,evoefscore_list,mutation_list)]
    else:
        finalscore_list = ssipscore_list
    result_list = [
        "%-7.3f %-9.3f %-10.3f %s" % (f, s, e, m) for f, s, e, m in zip(
            finalscore_list, ssipscore_list, evoefscore_list, mutation_list)
    ]
    fp = open(result_file, 'w')
    fp.write('DDG     SSIPscore EvoEFscore mutations\n')
    fp.write('\n'.join(result_list) + '\n')
    fp.close()

    #### copy mutant PDB structure if available ####
    for mutation in mutation_list:
        mutant_file = os.path.join(
            data_dir,
            "complex_" + mutation.rstrip(';').replace(',', '_') + ".pdb")
        mutant_tmp = os.path.join(
            tmp_dir,
            "complex_" + mutation.rstrip(';').replace(',', '_') + ".pdb")
        if os.path.isfile(mutant_tmp):
            shutil.copy(mutant_tmp, mutant_file)
    cmd = ' '.join([
        "cd", tmp_dir, ";",
        "tar -jcvf mutation_structures.tar.bz2 complex_*.pdb"
    ])
    os.system(cmd)
    shutil.copy(os.path.join(tmp_dir, "mutation_structures.tar.bz2"),
                os.path.join(data_dir, "mutation_structures.tar.bz2"))

    #get chain list
    header_list, sequence_list = pdb2seq(pdb_file)
    chain_list = [h.split(':')[-1] for h in header_list]
    #### extract interface PDB file ####
    cmd = ' '.join([
        extint, "-s", pdb_file, "-r", chain_list[0], "-l", chain_list[1], "-c",
        os.path.join(data_dir, "contact.list"), "-i",
        os.path.join(data_dir, "interface.pdb")
    ])
    os.system(cmd)

    #### make html output ####
    sys.stdout.write("step 6: index.html\n")
    #make_webpage(inv_resi_dict,data_dir,isscore,force_field,result_list)
    make_webpage(data_dir, isscore, force_field, result_list)

    #### clean up temporary directory ####
    shutil.rmtree(tmp_dir)
    return
Пример #2
0
def make_webpage(data_dir, isscore, force_field, result_list):
    '''make index.html for data folder "data_dir".'''
    html_template = Template("""<html>
<head><meta http-equiv="content-type" content="text/html; charset=UTF-8">
<title>SSIPe job finished</title></head>
<body>
[<a href="$HTTP_LINK">back to SSIPe server</a>]<br/>
<h1 align="center">SSIPe job id $JOBID</h1>

<div style="background:#6599FF;width:350px;">
<b>User input parameters and structure</b></div>
<ul>

<li>Interface similarity cutoff<br>
<font face="Courier">IS-score = $ISSCORE</font>
<br>

<li>Force field used for &Delta;&Delta;G prediction<br>
<font face="Courier">$FF</font>

<li>Sequence [<a href=seq.fasta>download</a>]<br>
<font face="Courier">
&gt;$CHAIN0<br>$SEQUENCE0<br>&gt;$CHAIN1<br>$SEQUENCE1<br></font>
<br>

<li>View of 3D structure structure
<script type="text/javascript" src="$JSMOL0"></script>
<script type="text/javascript" src="$JSMOL1"></script>
<table>
    <td align ="center">
    complex structure [<a href=complex.pdb>download</a>]
    <script type="text/javascript">
    jmolInitialize("$JSMOL_HOME/");
    jmolSetAppletColor("#000000");
    jmolApplet(300,"load complex.pdb; set frank off; calculate structure rama; select all; color chain; spacefill off; wireframe off; cartoons; set ambient 40; set spin x 10; set spin y 10; spin off; $JSMOL_CMD");
    </script>
    </td>

    <td align ="center">
    interface structure [<a href=interface.pdb>download</a>]
    <script type="text/javascript">
    jmolInitialize("$JSMOL_HOME/");
    jmolSetAppletColor("#000000");
    jmolApplet(300,"load interface.pdb; select all; color chain; set ambient 40; set spin x 10; set spin y 10; spin off; $JSMOL_CMD");
    </script>
    </td>
</table>
</ul>

<div style="background:#6599FF;width:150px;">
<b>Results</b></div>
<ul>
<li>Binding affinity change upon mutations [<a href=result.txt>download</a>] [<a href=$HTTP_LINK/help.html#ddG>Explanation</a>]<br>
<font face="Courier">
DDG&nbsp;&nbsp;&nbsp;&nbsp; SSIPscore EvoEFscore mutations<br>
$RESULTS<br>
</font>
<br>

<li>Download structure models of all mutants [<a href=mutation_structures.tar.bz2>download</a>]
[<a href=$HTTP_LINK/help.html#forcefield>Explanation</a>]<br>
<br><br>

<li>Structure-based interface MSA by iAlign from NIL [<a href=structure_align.out>download</a>]
[<a href=$HTTP_LINK/help.html#structure_alignment>Explanation</a>]<br>
<font face="Courier">$ALIGN</font><br><br>

<li>Sequence-based interface MSA by PSI-BLAST from STRING [<a href=sequence_align.out>download</a>]
[<a href=$HTTP_LINK/help.html#sequence_alignment>Explanation</a>]<br>
<font face="Courier">$ALIGN2</font><br><br>

</ul>
<hr>
<b>Reference:</b>
<ul>$CITATION</ul>
[<a href="$HTTP_LINK">back to SSIPe server</a>]<br/>
</body></html>""")
    # $HTTP_LINK  - HTTP url to SSIPe web portal
    # $JSMOL0     - first javascript for JSmol
    # $JSMOL1     - second javascript for JSmol
    # $JSMOL_HOME - JSmol javascript home
    # $JSMOL_CMD  - JSmol commands for highlighting interface residues
    # $CITATION   - citation for SSIPe paper
    # $JOBID      - webserver jobID
    # $ALIGN      - interface alignment
    # $RESULTS    - result list
    # $ISSCORE    - IS-score cutoff for interface simarity
    # $FF         - interface profile (and physics potential)
    # $CHAIN0     - chain 0
    # $SEQUENCE0  - sequence 0
    # $CHAIN1     - chain 1
    # $SEQUENCE1  - sequence 1

    # template for JSMOL_CMD
    jsmol_cmd_template = Template(
        "select $RESI:$CHAIN; wireframe 75; spacefill 100; color $COLOR; select $RESI:$CHAIN and *.ca; label $RESN$CHAIN$RESI; color label magenta;"
    )
    # $RESI       - residue index
    # $CHAIN      - chain ID
    # $COLOR      - residue color
    # $RESN       - residue amino acid type

    #### read seq.fasta ####
    chain_list = []
    sequence_list = []
    fasta_file = os.path.join(data_dir, "seq.fasta")
    if os.path.isfile(fasta_file):
        fp = open(fasta_file, 'rU')
        txt = fp.read()
        fp.close()
        for block in txt.split('>'):
            if block.strip():
                block = block.splitlines()
                chain_list.append(block[0])
                sequence_list.append(''.join(block[1:]))
    else:
        header_list, sequence_list = pdb2seq(
            os.path.join(data_dir, "complex.pdb"))
        chain_list = [h.split(':')[-1] for h in header_list]
        fp = open(fasta_file, 'w')
        fp.write( ''.join(['>'+h+'\n'+s+'\n' \
            for h,s in zip(header_list,sequence_list)]))
        fp.close()

    #### get all mutated residues ####
    mutation_residue_list = []
    for mutation_set in result_list:
        for mutation in mutation_set.split()[1].rstrip(';').split(','):
            resi = mutation[2:-1]
            chain = mutation[1]
            mutation_residue_list.append(chain + resi)

    #### read structure_align.out ####
    fp = open(os.path.join(data_dir, "structure_align.out"), 'rU')
    txt = fp.read().replace('query', 'target')
    fp.close()
    align_txt = ''
    jsmol_cmd = ''
    for line in txt.splitlines():
        align_txt += line + '<br>'
    #don't label the mutated residues to make webpage faster
    '''for line in txt.splitlines():
        if line.startswith("chain_ID_A:"):
            chain0=line[len("chain_ID_A:"):].strip()
        elif line.startswith("chain_ID_B:"):
            chain1=line[len("chain_ID_B:"):].strip()
        elif line.startswith("residue_A:"):
            txt="residue_A: "
            for i in line[len("residue_A:"):].strip().split():
                resi=inv_resi_dict[chain0][i] # residue index
                txt+=' '+resi
                if chain0+resi in mutation_residue_list:
                    resn=sequence_list[0][int(i)-1]
                    jsmol_cmd+=jsmol_cmd_template.substitute(dict(
                        RESI=resi,CHAIN=chain0,COLOR="blue",RESN=resn))
            line=txt
        elif line.startswith("residue_B:"):
            txt="residue_B: "
            for i in line[len("residue_B:"):].strip().split():
                resi=inv_resi_dict[chain1][i] # residue index
                txt+=' '+resi
                if chain1+resi in mutation_residue_list:
                    resn=sequence_list[1][int(i)-1]
                    jsmol_cmd+=jsmol_cmd_template.substitute(dict(
                        RESI=resi,CHAIN=chain1,COLOR="yellow",RESN=resn))
            line=txt
        align_txt+=line+'<br>'
    '''
    #### read sequence_align.out ####
    fp = open(os.path.join(data_dir, "sequence_align.out"), 'rU')
    txt = fp.read().replace('query', 'target')
    fp.close()
    align_txt2 = ''
    jsmol_cmd = ''
    for line in txt.splitlines():
        align_txt2 += line + '<br>'

    #### write index.html ####
    txt = html_template.substitute(
        dict(
            HTTP_LINK=http_link,
            JSMOL0=javascript_list[0],
            JSMOL1=javascript_list[1],
            JSMOL_HOME=os.path.dirname(javascript_list[1]),
            JSMOL_CMD=jsmol_cmd,
            CITATION=citation,
            JOBID=os.path.basename(data_dir),
            ALIGN=align_txt,
            ALIGN2=align_txt2,
            RESULTS="<br>".join(result_list),
            ISSCORE=isscore,
            FF=force_field,
            CHAIN0=chain_list[0],
            SEQUENCE0="<br>".join(textwrap.wrap(sequence_list[0], 60)),
            CHAIN1=chain_list[1],
            SEQUENCE1="<br>".join(textwrap.wrap(sequence_list[1], 60)),
        ))
    fp = open(os.path.join(data_dir, "index.html"), 'w')
    fp.write(txt)
    fp.close()
Пример #3
0
def get_final_score3(data_dir):
    '''
    4. run EvoEF,      -> evoef_score.txt
    5. get final score -> result.txt
    '''
    #### make temporary directory ####
    tmp_dir = "/tmp/" + os.getenv("USER") + "/" + os.path.basename(data_dir)
    if os.path.isdir(tmp_dir):
        shutil.rmtree(tmp_dir)
    os.makedirs(tmp_dir)

    #### parse input files ####
    isscore = "0.5"
    force_field = "EVOEF"

    isscore_file = os.path.join(data_dir, "isscore")
    force_field_file = os.path.join(data_dir, "force_field")
    pdb_file = os.path.join(data_dir, "complex.pdb")
    seq_file = os.path.join(data_dir, "seq.fasta")
    mutlist_file = os.path.join(data_dir, "mutList.txt")
    align_file1 = os.path.join(data_dir, "structure_align.out")
    align_file2 = os.path.join(data_dir, "sequence_align.out")
    ssipscore_file = os.path.join(data_dir, "ssip_score.txt")
    evoefscore_file = os.path.join(data_dir, "evoef_score.txt")
    result_file = os.path.join(data_dir, "result.txt")

    if os.path.isfile(isscore_file):
        fp = open(isscore_file, 'rU')
        isscore = fp.read().strip()
        fp.close()
    if os.path.isfile(force_field_file):
        fp = open(force_field_file, 'rU')
        force_field = fp.read().strip()
        fp.close()

    #### define temporary files ####
    pdb_tmp = os.path.join(tmp_dir, "complex.pdb")
    seq_tmp = os.path.join(tmp_dir, "seq.fasta")
    align_tmp1 = os.path.join(tmp_dir, "structure_align.out")
    align_tmp2 = os.path.join(tmp_dir, "sequence_align.out")
    mutlist_tmp = os.path.join(tmp_dir, "mutList.txt")
    ssipscore_tmp = os.path.join(tmp_dir, "ssip_score.txt")
    evoefscore_tmp = os.path.join(tmp_dir, "evoef_score.txt")

    #### renumber residue index and output the files to temporary folder ####
    #resi_dict,inv_resi_dict=reindex_complex(mutlist_file,pdb_file,mutlist_tmp,pdb_tmp)
    shutil.copy(mutlist_file, mutlist_tmp)
    shutil.copy(pdb_file, pdb_tmp)

    #### get evoef score ####
    sys.stdout.write("step 4: get_evoef_score\n")
    if force_field == "EVOEF":
        if os.path.isfile(evoefscore_file):
            shutil.copy(evoefscore_file, evoefscore_tmp)
        else:
            #shutil.copy(pdb_file,pdb_tmp)
            cmd = ' '.join([
                "cd", tmp_dir, ';', run_evoef, pdb_tmp, mutlist_tmp,
                evoefscore_tmp
            ])
            os.system(cmd)
            shutil.copy(evoefscore_tmp, evoefscore_file)

    #### get final score ####
    sys.stdout.write("step 5: get_final_score\n")
    fp = open(mutlist_file, 'rU')
    mutation_list = fp.read().strip().splitlines()
    fp.close()
    fp = open(evoefscore_tmp, 'rU')
    evoefscore_list = [float(f) for f in fp.read().splitlines()]
    fp.close()
    finalscore_list = evoefscore_list
    result_list = [
        "%-7.3f %-10.3f %s" % (f, e, m)
        for f, e, m in zip(finalscore_list, evoefscore_list, mutation_list)
    ]
    fp = open(result_file, 'w')
    fp.write('DDG     EvoEFscore mutations\n')
    fp.write('\n'.join(result_list) + '\n')
    fp.close()

    #### copy mutant PDB structure if available ####
    for mutation in mutation_list:
        mutant_file = os.path.join(
            data_dir,
            "complex_" + mutation.rstrip(';').replace(',', '_') + ".pdb")
        mutant_tmp = os.path.join(
            tmp_dir,
            "complex_" + mutation.rstrip(';').replace(',', '_') + ".pdb")
        if os.path.isfile(mutant_tmp):
            shutil.copy(mutant_tmp, mutant_file)
    cmd = ' '.join([
        "cd", tmp_dir, ";",
        "tar -jcvf mutation_structures.tar.bz2 complex_*.pdb"
    ])
    os.system(cmd)
    shutil.copy(os.path.join(tmp_dir, "mutation_structures.tar.bz2"),
                os.path.join(data_dir, "mutation_structures.tar.bz2"))

    #get chain list
    header_list, sequence_list = pdb2seq(pdb_file)
    chain_list = [h.split(':')[-1] for h in header_list]
    #### extract interface PDB file ####
    cmd = ' '.join([
        extint, "-s", pdb_file, "-r", chain_list[0], "-l", chain_list[1], "-c",
        os.path.join(data_dir, "contact.list"), "-i",
        os.path.join(data_dir, "interface.pdb")
    ])
    os.system(cmd)

    #### make html output ####
    sys.stdout.write("step 6: index.html\n")
    #make_webpage3(inv_resi_dict,data_dir,isscore,force_field,result_list)
    make_webpage3(data_dir, isscore, force_field, result_list)

    #### clean up temporary directory ####
    shutil.rmtree(tmp_dir)
    return