Exemplo n.º 1
0
def curate_homoDB(verbosity):
    '''
    Creates h**o-oligomeric database from a local pdb repsitory.
    The divided scheme adopted by RCSB, in which the subdirectories
    are the two middle characters in the PDB code, is assumed.
    Each database contains three key files: dat, log and fasta.
    * homodb.dat contains only the pdb codes contained in the database.
    * homodb.log contains summarized relevant information about each entry.
    * homodb.fasta contains the sequences of every chain in the database.
    Called by: update_databases()
    '''
    # Create stats folder if does not exist
    stats_dir = os.path.join(pdb_homo_archive, 'stats')
    if not os.path.isdir(stats_dir):
        os.mkdir(stats_dir)
    # Compare latest assession with new files
    assession_log = read_latest_assession(stats_dir)
    new_files = list_new_files(pdb1_archive, assession_log, verbosity)
    print(clrs['g'] + str(len(new_files)) + clrs['n'] +
          ' new structure files were found and will be processed')
    now = str(time.strftime("%d-%m-%Y@%H.%M.%S"))
    dat_file = os.path.join(stats_dir, now + '-choirdb.dat')
    log_file = os.path.join(stats_dir, now + '-choirdb.log')
    err_file = os.path.join(stats_dir, now + '-choirdb.err')
    if not os.path.isfile(dat_file):
        with open(dat_file, 'w+'):
            pass
    # Write files not to be updated to new dat file
    with open(dat_file, 'a') as f:
        for i in assession_log:
            if i not in new_files:
                f.write(i + " " + assession_log[i] + "\n")
    # Create log file
    if not os.path.isfile(log_file):
        with open(log_file, 'w+') as f:
            f.write('Code, Chains, Author, Software, Date\n')

    # Read Chain correspondences
    chain_correspondences_file = os.path.join(stats_dir,
                                              'chain_correspondences.pickle')
    if os.path.isfile(chain_correspondences_file):
        with open(chain_correspondences_file, 'rb') as p:
            chain_correspondences = pickle.load(p)
    else:
        chain_correspondences = {}

    # Main loop that will populate the ProtCHOIR database
    for pdb in pg(new_files, widgets=widgets):
        filename = pdb.split('/')[-1]
        subfolder = pdb.split('/')[-2]
        # Record assessment in dat file
        with open(dat_file, 'a') as f:
            f.write(filename + " " + str(time.time()) + '\n')
        # Start assession
        pctools.printv('\nAssessing ' + pdb + '...', verbosity)
        # Reject files larger than 10Mb
        file_size = os.stat(pdb).st_size / 1048576
        pctools.printv(
            'File size: ' + clrs['c'] + '{0:.1g}'.format(file_size) + ' Mb' +
            clrs['n'], verbosity)
        if file_size > 2:
            pctools.printv(clrs['r'] + "File size too large!" + clrs['n'],
                           verbosity)
            pctools.printv(
                clrs['y'] +
                "Will try to fetch sequences from asymmetric unit." +
                clrs['n'], verbosity)
            try:
                alternative_pdb = os.path.join(
                    pdb_archive, subfolder,
                    'pdb' + filename.split('.')[0] + '.ent.gz')
                pdb_code, structure, nchains = pctools.parse_pdb_structure(
                    alternative_pdb)
                structure, chain_correspondences[
                    pdb_code] = pctools.split_states(structure)
                nchainspostsplit, seqs, chain_ids = pctools.extract_seqs(
                    structure, 0)
                # Write in fasta file
                pctools.printv(
                    clrs['y'] + "Recording large-pdb sequence" + clrs['n'],
                    verbosity)
                record_fasta(pdb_code,
                             seqs,
                             chain_ids,
                             subfolder,
                             type='largepdb')
            except:
                pctools.printv(
                    clrs['r'] + "Failed to fetch sequence!" + clrs['n'],
                    verbosity)
            continue

        try:
            pdb_code, structure, nchains = pctools.parse_pdb_structure(pdb)
            pctools.printv(
                'Number of chains in structure ' + clrs['y'] + pdb_code +
                clrs['n'] + ': ' + str(nchains), verbosity)
            # Reject structures with more than 60 chains
            if int(nchains) > 60:
                pctools.printv(
                    "Number of chains (" + clrs['y'] + str(nchains) +
                    clrs['n'] + ") larger than 60! " + clrs['r'] +
                    "Too many chains!" + clrs['n'], verbosity)
                pctools.printv(
                    clrs['y'] + "Will try to fetch sequences anyway." +
                    clrs['n'], verbosity)
                try:
                    pdb_code, structure, nchains = pctools.parse_pdb_structure(
                        pdb)
                    structure, chain_correspondences[
                        pdb_code] = pctools.split_states(structure)
                    nchainspostsplit, seqs, chain_ids = pctools.extract_seqs(
                        structure, 0)
                    pctools.printv(
                        clrs['y'] + "Recording large-pdb sequence" + clrs['n'],
                        verbosity)
                    # Write in fasta file
                    record_fasta(pdb_code,
                                 seqs,
                                 chain_ids,
                                 subfolder,
                                 type='largepdb')
                except:
                    pctools.printv(
                        clrs['r'] + "Failed to fetch sequence!" + clrs['n'],
                        verbosity)
                continue

            structure, chain_correspondences[pdb_code] = pctools.split_states(
                structure)
            nchainspostsplit, seqs, chain_ids = pctools.extract_seqs(
                structure, 0)
            pctools.printv(
                'Number of chains (' + clrs['c'] + str(nchains) + clrs['n'] +
                ') and file size (' + clrs['c'] + str(file_size) + clrs['n'] +
                ') OK.' + clrs['g'] + ' Proceeding.' + clrs['n'] + '\n',
                verbosity)
            # Try to get info from the canonic pdb header (homonimous to pdb1)
            canonpdb = "pdb" + pdb_code + ".ent.gz"
            try:
                contents = pctools.parse_pdb_contents(
                    os.path.join(pdb_archive, subfolder, canonpdb))[1]
            except:
                pctools.printv(
                    clrs['r'] +
                    '\n\n Mismatch between pdb and biounit entries...' +
                    clrs['n'], verbosity)
            author, software = pctools.get_annotated_states(contents)
            pctools.printv(
                'Author determined biological unit = ' + str(author),
                verbosity)
            pctools.printv(
                'Software determined quaternary structure= ' + str(software),
                verbosity)
            # Start assessing sequences and structures (from 2 up to 26 chains)
            if 1 < int(nchains) < 61:
                ids, proteinpair = pctools.get_pairwise_ids(seqs, nchains)
                for id in ids:
                    if id[0] >= 90:
                        color = clrs['g']
                    else:
                        color = clrs['r']
                    pctools.printv(
                        'Identity between chains ' + clrs['y'] + str(id[1]) +
                        clrs['n'] + ' and ' + clrs['y'] + str(id[2]) +
                        clrs['n'] + ' is ' + color + str(id[0]) + "%" +
                        clrs['n'] + ".", verbosity)
                # Save records for pure h**o-oligomers
                if all(id[0] > 90 for id in ids) and proteinpair is True:
                    pctools.printv(
                        "All identities over 90%. Likely " + clrs['b'] +
                        "h**o-oligomeric" + clrs['n'] + ".", verbosity)
                    pctools.printv(clrs['y'] + "FETCHING" + clrs['n'] + ".\n",
                                   verbosity)
                    # Write file to database
                    newfile = os.path.join(pdb_homo_archive, subfolder,
                                           pdb_code + ".pdb")
                    if not os.path.isdir(
                            os.path.join(pdb_homo_archive, subfolder)):
                        os.mkdir(os.path.join(pdb_homo_archive, subfolder))
                    io.set_structure(structure)
                    io.save(newfile)
                    pctools.gzip_pdb(newfile)
                    # Write to log file
                    with open(log_file, 'a') as f:
                        f.write(
                            str(pdb_code) + "," + str(nchains) + "," +
                            '/'.join(author) + "," + '/'.join(software) + "," +
                            str(os.path.getctime(newfile + '.gz')) + '\n')
                    # Write in fasta file
                    pctools.printv(
                        clrs['y'] + "Recording h**o-oligomer sequence." +
                        clrs['n'], verbosity)
                    record_fasta(pdb_code,
                                 seqs,
                                 chain_ids,
                                 subfolder,
                                 type='h**o')

                # Investigate partial h**o-oligomers
                elif any(id[0] > 90 for id in ids) and proteinpair is True:
                    at_least_one_interface = False
                    for id in ids:
                        if id[0] > 90:
                            # Check if similar chains share interfaces
                            if pctools.check_interfaces(
                                    structure, id[1], id[2]):
                                at_least_one_interface = True
                                pctools.printv(
                                    'Contacts found between chains ' +
                                    clrs['g'] + str(id[1]) + clrs['n'] +
                                    ' and ' + clrs['g'] + str(id[2]) +
                                    clrs['n'] + ' sharing ' + clrs['g'] +
                                    str(id[0]) + clrs['n'] + " % identity.",
                                    verbosity)
                                pctools.printv(
                                    "At least one putative " + clrs['b'] +
                                    "h**o-oligomeric " + clrs['n'] +
                                    "interface found.", verbosity)
                                pctools.printv(
                                    clrs['y'] + "FETCHING" + clrs['n'] + ".\n",
                                    verbosity)
                                # Write file to database
                                newfile = os.path.join(pdb_homo_archive,
                                                       subfolder,
                                                       pdb_code + ".pdb")
                                if not os.path.isdir(
                                        os.path.join(pdb_homo_archive,
                                                     subfolder)):
                                    os.mkdir(
                                        os.path.join(pdb_homo_archive,
                                                     subfolder))
                                io.set_structure(structure)
                                io.save(newfile)
                                pctools.gzip_pdb(newfile)
                                # Write to log file
                                with open(log_file, 'a') as f:
                                    f.write(
                                        str(pdb_code) + "," + str(nchains) +
                                        "," + '/'.join(author) + "," +
                                        '/'.join(software) + "," +
                                        str(os.path.getctime(newfile +
                                                             '.gz')) + '\n')
                                # Write in fasta file
                                pctools.printv(
                                    clrs['y'] +
                                    "Recording h**o-oligomer sequence." +
                                    clrs['n'], verbosity)
                                record_fasta(pdb_code,
                                             seqs,
                                             chain_ids,
                                             subfolder,
                                             type='h**o')

                                break
                    if at_least_one_interface is False:
                        pctools.printv(
                            "No h**o-oligomeric interface found. Likely " +
                            clrs['r'] + "hetero-oligomeric" + clrs['n'] + ".",
                            verbosity)
                        pctools.printv(
                            clrs['y'] + "Recording hetero-oligomer sequence" +
                            clrs['n'], verbosity)
                        # Write in fasta file
                        record_fasta(pdb_code,
                                     seqs,
                                     chain_ids,
                                     subfolder,
                                     type='hetero')

                elif proteinpair is False:
                    pctools.printv(
                        clrs['r'] + "No proteic chain pairs found" +
                        clrs['n'] + ".", verbosity)
                    if any([set(seq[1]) != {'X'} for seq in seqs]):
                        pctools.printv(
                            clrs['y'] + "Protein sequences found though" +
                            clrs['n'], verbosity)
                        pctools.printv(
                            clrs['y'] + "Recording hetero-oligomer sequence" +
                            clrs['n'], verbosity)
                        # Write in fasta file
                        record_fasta(pdb_code,
                                     seqs,
                                     chain_ids,
                                     subfolder,
                                     type='hetero')
                    else:
                        pctools.printv(
                            clrs['r'] +
                            "Not even a single protein chain. Disregarding." +
                            clrs['n'], verbosity)

                else:
                    pctools.printv(
                        "No similar chains found. Likely " + clrs['r'] +
                        "hetero-oligomeric" + clrs['n'] + ".", verbosity)
                    pctools.printv(
                        clrs['y'] + "Recording hetero-oligomer sequence" +
                        clrs['n'], verbosity)
                    record_fasta(pdb_code,
                                 seqs,
                                 chain_ids,
                                 subfolder,
                                 type='hetero')

            elif int(nchains) == 1:
                pctools.printv(
                    "Only one chain found. Likely " + clrs['r'] + "monomeric" +
                    clrs['n'] + ".", verbosity)
                pctools.printv(
                    clrs['y'] + "Recording monomer sequence." + clrs['n'],
                    verbosity)
                structure, chain_correspondences[
                    pdb_code] = pctools.split_states(structure)
                nchains, seqs, chain_ids = pctools.extract_seqs(structure, 0)
                record_fasta(pdb_code, seqs, chain_ids, subfolder, type='mono')

        except:
            errtype, errvalue, errtraceback = sys.exc_info()
            errtypeshort = str(errtype).split('\'')[1]
            pctools.printv(
                clrs['r'] + '*' + str(errtypeshort) + ': ' + str(errvalue) +
                ' l.' + str(errtraceback.tb_lineno) + '*' + clrs['n'],
                verbosity)
            traceback.print_exception(*sys.exc_info())
            if errtypeshort == 'KeyboardInterrupt':
                quit()
            #pctools.printv(clrs['r']+"UNKNOWN FAULT"+clrs['n']+".", verbosity)
            if not os.path.isfile(err_file):
                with open(err_file, 'w+') as f:
                    pass
            with open(err_file, 'a') as f:
                f.write(filename + '\n')
            continue

    with open(chain_correspondences_file, 'wb') as p:
        pickle.dump(chain_correspondences, p, protocol=pickle.HIGHEST_PROTOCOL)

    if not os.path.isfile(err_file):
        with open(err_file, 'w+') as f:
            f.write('\nNo errors. Assessment terminated succesfully.\n')
Exemplo n.º 2
0
def analyse_model(oligomer):
    output = []
    model_report = g_report.copy()
    model_report['model_filename'] = oligomer
    model_oligomer_name = os.path.basename(oligomer).split(
        "_CHOIR_")[0].replace('.', '_')
    output.append(pctools.subsection('3', model_oligomer_name))
    output.append('Analysing oligomer file: ' + clrs['y'] + oligomer +
                  clrs['n'] + '\n')
    model_report['model_oligomer_name'] = model_oligomer_name
    if g_args.generate_report is True:
        model_report['model_figures'], pymol_output = pctools.pymol_screenshot(
            oligomer, g_args, putty=True)
        output.append(pymol_output)
    pdb_name, structure, nchains = pctools.parse_any_structure(oligomer)
    nchains, seqs, chain_ids = pctools.extract_seqs(structure, 0)
    relevant_chains = []
    for seq in seqs:
        relevant_chains.append(seq[0])

    pisa_output, pisa_error, protomer_data = pctools.run_pisa(
        oligomer,
        '',
        g_args.verbosity,
        gen_monomer_data=True,
        gen_oligomer_data=True)
    protomer_surface_residues = pctools.get_areas(protomer_data)
    model_report['assemblied_protomer_plot'], model_report[
        'assemblied_protomer_exposed_area'], model_report[
            'assemblied_protomer_hydrophobic_area'], model_report[
                'assemblied_protomer_conserved_area'], minx, maxx, analysis_output = pctools.plot_analysis(
                    pdb_name,
                    protomer_surface_residues,
                    g_entropies,
                    g_z_entropies,
                    g_tmdata,
                    g_args,
                    minx=g_minx,
                    maxx=g_maxx)
    output.append(analysis_output)

    if 'I' in g_args.assessment and not g_args.allow_monomers:
        output.append(
            pctools.subsection('3' + '[I]', 'Interfaces Comparison: ' +
                               model_oligomer_name))
        if g_args.sequence_mode is False and g_args.skip_conservation is False:
            model_report['exposed_area_reduction'] = int(
                100 *
                (float(model_report['assemblied_protomer_exposed_area']) -
                 float(model_report['protomer_exposed_area'])) /
                float(model_report['protomer_exposed_area']))
            model_report['hydrophobic_area_reduction'] = int(
                100 *
                (float(model_report['assemblied_protomer_hydrophobic_area']) -
                 float(model_report['protomer_hydrophobic_area'])) /
                float(model_report['protomer_hydrophobic_area']))
            model_report['conserved_area_reduction'] = int(
                100 *
                (float(model_report['assemblied_protomer_conserved_area']) -
                 float(model_report['protomer_conserved_area'])) /
                float(model_report['protomer_conserved_area']))

            if model_report['exposed_area_reduction'] < -5:
                if model_report['hydrophobic_area_reduction'] < 0:
                    hydophobic_surface_score = 10 * (
                        model_report['hydrophobic_area_reduction'] /
                        model_report['exposed_area_reduction']) / 3
                else:
                    hydophobic_surface_score = 0
                if hydophobic_surface_score > 10:
                    hydophobic_surface_score = 10
                output.append('Hydrophobic surface score: ' +
                              str(hydophobic_surface_score))
                if model_report['conserved_area_reduction'] < 0:
                    conserved_surface_score = 10 * (
                        model_report['conserved_area_reduction'] /
                        model_report['exposed_area_reduction']) / 3
                else:
                    conserved_surface_score = 0
                if conserved_surface_score > 10:
                    conserved_surface_score = 10
                output.append('Conserved surface score: ' +
                              str(conserved_surface_score))
                model_report['surface_score'] = round(
                    (hydophobic_surface_score + conserved_surface_score) / 2,
                    2)
            else:
                output.append(clrs['r'] + 'Exposed area reduction too small.' +
                              clrs['n'])
                model_report['surface_score'] = 0
            output.append('Final surface score: ' +
                          str(model_report['surface_score']))
        else:
            model_report['surface_score'] = 'NA'

        model_oligomer = oligomer.split('_CHOIR_CorrectedChains')[0]
        xml_out = model_oligomer + '_CHOIR_PisaInterfaces.xml'
        model_interfaces_list, interfaces_output = pctools.parse_interfaces(
            xml_out, relevant_chains, g_args.verbosity)
        template_interfaces_list = g_interfaces_dict[g_template_hitchain]

        if model_interfaces_list and template_interfaces_list:
            if g_args.verbosity > 0:
                output.append(clrs['y'] + 'MODEL INTERFACES' + clrs['n'])
                for model_interface in model_interfaces_list:
                    output.append(clrs['y'] +
                                  ' <> '.join(model_interface['chains']) +
                                  clrs['n'])
                    output.append(clrs['y'] + 'Interface Area: ' + clrs['n'] +
                                  str(model_interface['interface area']) +
                                  ' A^2')
                    output.append(
                        clrs['y'] + 'Interface Solvation Energy: ' +
                        clrs['n'] +
                        str(model_interface['interface solvation energy']) +
                        ' kcal/mol')
                    output.append(clrs['y'] + 'Hydrogen Bonds: ' + clrs['n'] +
                                  str(model_interface['hydrogen bonds']))
                    output.append(clrs['y'] + 'Salt Bridges: ' + clrs['n'] +
                                  str(model_interface['salt bridges']))
                    output.append(clrs['y'] + 'Disulphide Bridges: ' +
                                  clrs['n'] +
                                  str(model_interface['disulphide bridges']) +
                                  "\n\n")

            interfaces_comparison = {}
            for template_interface in template_interfaces_list:
                for model_interface in model_interfaces_list:
                    if set(model_interface['chains']) == set(
                            template_interface['chains']):
                        comparison_data = {}
                        denominator = 12
                        delta_area = round(
                            model_interface['interface area'] -
                            template_interface['interface area'], 2)
                        comparison_data['model area'] = model_interface[
                            'interface area']
                        comparison_data['template area'] = template_interface[
                            'interface area']
                        comparison_data['delta area'] = delta_area
                        delta_energy = round(
                            model_interface['interface solvation energy'] -
                            template_interface['interface solvation energy'],
                            2)
                        comparison_data['model energy'] = model_interface[
                            'interface solvation energy']
                        comparison_data[
                            'template energy'] = template_interface[
                                'interface solvation energy']
                        comparison_data['delta energy'] = delta_energy
                        delta_hb = round(
                            model_interface['hydrogen bonds'] -
                            template_interface['hydrogen bonds'], 2)
                        comparison_data['model hb'] = model_interface[
                            'hydrogen bonds']
                        comparison_data['template hb'] = template_interface[
                            'hydrogen bonds']
                        comparison_data['delta hb'] = delta_hb
                        delta_sb = round(
                            model_interface['salt bridges'] -
                            template_interface['salt bridges'], 2)
                        comparison_data['model sb'] = model_interface[
                            'salt bridges']
                        comparison_data['template sb'] = template_interface[
                            'salt bridges']
                        comparison_data['delta sb'] = delta_sb
                        delta_ss = round(
                            model_interface['disulphide bridges'] -
                            template_interface['disulphide bridges'], 2)
                        comparison_data['model ss'] = model_interface[
                            'disulphide bridges']
                        comparison_data['template ss'] = template_interface[
                            'disulphide bridges']
                        comparison_data['delta ss'] = delta_ss

                        output.append(clrs['y'] + 'INTERFACES COMPARISON' +
                                      clrs['n'])
                        output.append(' <> '.join(model_interface['chains']))
                        if delta_area >= 0:
                            emphasis_color = clrs['g']
                            relative_area = 100
                        else:
                            emphasis_color = clrs['r']
                            relative_area = round(
                                model_interface['interface area'] * 100 /
                                template_interface['interface area'], 2)
                        output.append('Delta Interface Area: ' +
                                      emphasis_color + str(delta_area) +
                                      clrs['n'] + ' A^2 (' +
                                      str(relative_area) + '%)')

                        if delta_energy <= 0:
                            emphasis_color = clrs['g']
                            relative_energy = 100
                        else:
                            emphasis_color = clrs['r']
                            if model_interface[
                                    'interface solvation energy'] < 0 and template_interface[
                                        'interface solvation energy'] < 0:
                                relative_energy = round(
                                    model_interface[
                                        'interface solvation energy'] * 100 /
                                    template_interface[
                                        'interface solvation energy'], 2)
                            elif model_interface[
                                    'interface solvation energy'] > 0 and template_interface[
                                        'interface solvation energy'] < 0:
                                relative_energy = 0
                            elif model_interface[
                                    'interface solvation energy'] < 0 and template_interface[
                                        'interface solvation energy'] > 0:
                                relative_energy = 100
                            elif model_interface[
                                    'interface solvation energy'] > 0 and template_interface[
                                        'interface solvation energy'] > 0:
                                relative_energy = 0
                        output.append('Delta Interface Solvation Energy: ' +
                                      emphasis_color + str(delta_energy) +
                                      clrs['n'] + ' kcal/mol (' +
                                      str(relative_energy) + '%)')

                        if model_interface[
                                'hydrogen bonds'] == template_interface[
                                    'hydrogen bonds'] == 0:
                            relative_hb = 0
                            emphasis_color = clrs['r']
                            denominator -= 2
                        elif delta_hb >= 0:
                            relative_hb = 100
                            emphasis_color = clrs['g']
                        else:
                            emphasis_color = clrs['r']
                            relative_hb = round(
                                model_interface['hydrogen bonds'] * 100 /
                                template_interface['hydrogen bonds'], 2)
                        output.append('Delta Hydrogen Bonds: ' +
                                      emphasis_color + str(delta_hb) +
                                      clrs['n'] + ' (' + str(relative_hb) +
                                      '%)')

                        if model_interface[
                                'salt bridges'] == template_interface[
                                    'salt bridges'] == 0:
                            relative_sb = 0
                            emphasis_color = clrs['r']
                            denominator -= 3
                        elif delta_sb >= 0:
                            relative_sb = 100
                            emphasis_color = clrs['g']
                        else:
                            relative_sb = round(
                                model_interface['salt bridges'] * 100 /
                                template_interface['salt bridges'], 2)
                            emphasis_color = clrs['r']
                        output.append('Delta Salt Bridges: ' + emphasis_color +
                                      str(delta_sb) + clrs['n'] + ' (' +
                                      str(relative_sb) + '%)')

                        if model_interface[
                                'disulphide bridges'] == template_interface[
                                    'disulphide bridges'] == 0:
                            relative_ss = 0
                            emphasis_color = clrs['r']
                            denominator -= 4
                        elif delta_ss >= 0:
                            relative_ss = 100
                            emphasis_color = clrs['g']
                        else:
                            relative_ss = round(
                                model_interface['disulphide bridges'] * 100 /
                                template_interface['disulphide bridges'], 2)
                            emphasis_color = clrs['r']
                        output.append('Delta Disulphide Bridges: ' +
                                      emphasis_color + str(delta_ss) +
                                      clrs['n'] + ' (' + str(relative_ss) +
                                      '%)\n')

                        if denominator == 0:
                            comparison_data['score'] = 0
                        else:
                            comparison_data['score'] = round(
                                (relative_area + 2 * relative_energy +
                                 2 * relative_hb + 3 * relative_sb +
                                 4 * relative_ss) / denominator, 2)
                        output.append('Interface score: ' +
                                      str(comparison_data['score']))
                        interfaces_comparison[''.join(
                            sorted(
                                model_interface['chains']))] = comparison_data

            comparison_plots, interfaces_output = plot_deltas(
                model_oligomer_name, template, interfaces_comparison, g_args)
            model_report['comparison_plots'] = os.path.basename(
                comparison_plots)
            output.append(interfaces_output)
            summed_score = 0
            for interface, data in interfaces_comparison.items():
                summed_score += data['score']

            model_report['interfaces_score'] = round(
                summed_score / (10 * len(interfaces_comparison)), 2)
            output.append('Final interfaces score: ' +
                          str(model_report['interfaces_score']))
        else:
            if 'surface_score' not in model_report:
                model_report['surface_score'] = 0
            model_report['interfaces_score'] = 0

    else:
        model_report['surface_score'] = 'NA'
        model_report['interfaces_score'] = 'NA'
        model_report['comparison_plots'] = 'NA'
        model_report['assemblied_protomer_exposed_area'] = 'NA'
        model_report['assemblied_protomer_hydrophobic_area'] = 'NA'
        model_report['assemblied_protomer_conserved_area'] = 'NA'

    if 'G' in g_args.assessment:
        output.append(pctools.subsection('3' + '[G]', 'GESAMT Comparison'))
        qscore, rmsd, fasta_out, gesamt_output = pctools.run_gesamt(
            template, template_file, model_oligomer_name, oligomer, None,
            g_args)
        output.append(gesamt_output)
        model_report['gesamt_qscore'] = str(qscore)
        model_report['gesamt_rmsd'] = str(rmsd)
    else:
        model_report['gesamt_qscore'] = 'NA'
        model_report['gesamt_rmsd'] = 'NA'

    if 'M' in g_args.assessment:
        output.append(pctools.subsection('3' + '[M]', 'Molprobity Comparison'))
        model_molprobity, molprobity_output = pctools.run_molprobity(
            oligomer, g_args)
        output.append(molprobity_output)
        model_report['model_clashscore'] = str(model_molprobity['clashscore'])
        model_report['model_molprobity'] = str(
            model_molprobity['molprobity_score'])
        output.append(clrs['y'] + 'MOLPROBITY COMPARISON' + clrs['n'])
        output.append('Criterion\tTempl.\tModel')
        output.append('Rama. Fav.\t' + str(template_molprobity['rama_fav']) +
                      '\t' + str(model_molprobity['rama_fav']))
        output.append('Rama. Out.\t' + str(template_molprobity['rama_out']) +
                      '\t' + str(model_molprobity['rama_out']))
        output.append('Rot. Out.\t' + str(template_molprobity['rot_out']) +
                      '\t' + str(model_molprobity['rot_out']))
        output.append('CBeta Dev.\t' + str(template_molprobity['cb_dev']) +
                      '\t' + str(model_molprobity['cb_dev']))
        output.append('Clashscore\t' + str(template_molprobity['clashscore']) +
                      '\t' + str(model_molprobity['clashscore']))
        output.append('Molprob. Score\t' +
                      str(template_molprobity['molprobity_score']) + '\t' +
                      str(model_molprobity['molprobity_score']))
        molprobity_radar, radar_output = plot_molprobity(
            model_oligomer_name, model_molprobity, template,
            template_molprobity)
        output.append(radar_output)
        model_report['molprobity_radar'] = molprobity_radar
        delta_clashscore = (model_molprobity['clashscore'] -
                            template_molprobity['clashscore']) / 10
        output.append('Delta clashscore: ' + str(delta_clashscore))
        if delta_clashscore >= 1:
            model_report['quality_score'] = round(
                10 - math.log(delta_clashscore**5, 10), 2)
        else:
            model_report['quality_score'] = 10
        output.append('Final quality score: ' +
                      str(model_report['quality_score']))
    else:
        model_report['model_clashscore'] = 'NA'
        model_report['model_molprobity'] = 'NA'
        model_report['quality_score'] = 'NA'

    if 'M' in g_args.assessment and 'I' in g_args.assessment and not g_args.allow_monomers:
        if g_args.sequence_mode is False and g_args.skip_conservation is False:
            model_report['protchoir_score'] = round(
                sum([
                    model_report['interfaces_score'],
                    model_report['surface_score'],
                    model_report['quality_score']
                ]) / 3, 2)
        else:
            model_report['protchoir_score'] = round(
                sum([
                    model_report['interfaces_score'],
                    model_report['quality_score']
                ]) / 2, 2)
    elif 'M' in g_args.assessment:
        model_report['protchoir_score'] = model_report['quality_score']
    elif 'I' in g_args.assessment:
        if g_args.sequence_mode is False and g_args.skip_conservation is False:
            model_report['protchoir_score'] = round(
                sum([
                    model_report['interfaces_score'],
                    model_report['surface_score']
                ]) / 2, 2)
        else:
            model_report['protchoir_score'] = model_report['interfaces_score']
    else:
        model_report['protchoir_score'] = 'NA'
    if str(model_report['protchoir_score']) == 'NA':
        model_report['score_color'] = 'grey'
    elif model_report['protchoir_score'] <= 5:
        model_report['score_color'] = 'red'
    elif 5 < model_report['protchoir_score'] <= 7:
        model_report['score_color'] = 'orange'
    elif model_report['protchoir_score'] > 7:
        model_report['score_color'] = 'green'

    pickle.dump(model_report,
                open(model_oligomer_name + '_CHOIR_model_report.pickle', 'wb'))

    return model_report, '\n'.join(output)