示例#1
0
 def test_dssr_backslash_in_filename(self):
     """
     DSSR puts the input filename in the JSON, which makes the JSON invalid,
     if a backslash is in it. We patch the DSSR JSON before parsing.
     """
     with make_temp_directory() as d:
         # On Windows, bla is a directory, and the backslash is
         # part of the path,
         # on decent operating systems,
         # the backslash is part of the filename.
         filename = os.path.join(d, "bla\\something.pdb")
         dir, rest = os.path.split(filename)
         # On Windows, make the directory bla, on Linux do nothing
         try:
             os.makedirs(dir)
         except OSError:
             # Directory exists
             pass
         shutil.copy('test/forgi/threedee/data/1y26.pdb', filename)
         try:
             # Make sure we do not raise any error.
             cg, = ftmc.CoarseGrainRNA.from_pdb(filename,
                                                annotation_tool="DSSR")
         except ftmc.AnnotationToolNotInstalled:
             self.skipTest("This Test requires DSSR")
     self.check_graph_integrity(cg)
     self.assertGreater(len(cg.defines), 2)
示例#2
0
 def test_dssr_backslash_in_filename(self):
     """
     DSSR puts the input filename in the JSON, which makes the JSON invalid,
     if a backslash is in it. We patch the DSSR JSON before parsing.
     """
     with make_temp_directory() as d:
         # On Windows, bla is a directory, and the backslash is
         # part of the path,
         # on decent operating systems,
         # the backslash is part of the filename.
         filename=os.path.join(d, "bla\\something.pdb")
         dir, rest = os.path.split(filename)
         # On Windows, make the directory bla, on Linux do nothing
         try:
             os.makedirs(dir)
         except OSError:
             # Directory exists
             pass
         shutil.copy('test/forgi/threedee/data/1y26.pdb', filename)
         try:
             # Make sure we do not raise any error.
             cg, = ftmc.CoarseGrainRNA.from_pdb(filename,
                                            annotation_tool="DSSR")
         except ftmc.AnnotationToolNotInstalled:
             self.skipTest("This Test requires DSSR")
     self.check_graph_integrity(cg)
     self.assertGreater(len(cg.defines), 2)
示例#3
0
def main(args):
    rnas = fuc.cgs_from_args(args, '+', '3d')
    pp = pymol_printer_from_args(args)

    if args.align:
        align_rnas(rnas)
    if args.labels:
        label_list = args.labels.split(",")
        labels = {}
        for label in label_list:
            if not label:
                continue
            try:
                elem, lab = label.split(':')
            except ValueError:
                raise ValueError(
                    "Please specify --labels with as list of colon-seperated tuples. Found invalid entry {}.".format(repr(label)))
            labels[elem] = lab
        if not pp.print_text:
            labels = defaultdict(lambda: "", labels)
            pp.print_text = True
    else:
        labels = {}

    color_modifier = 1.0
    log.info("Visualizing {} rnas".format(len(rnas)))
    for rna in rnas:
        pp.add_cg(rna, labels, color_modifier)
        color_modifier *= 0.7

    with make_temp_directory() as tmpdir:
        # The file describing the cg-structure as cylinders
        if args.pymol_file:
            stru_filename = args.pymol_file
        else:
            stru_filename = os.path.join(tmpdir, "structure")
        with open(stru_filename, "w") as f:
            f.write(pp.pymol_string())

        pdb_fns = []
        selections = ""
        for i, rna in enumerate(rnas):
            if rna.chains:
                obj_name = "pdb{}_{}".format(i, rna.name.replace("-", "_"))
                fn = os.path.join(tmpdir, obj_name + ".cif")
                pdb_fns.append(fn)
                ftup.output_multiple_chains(rna.chains.values(), fn, "cif")
                for d in rna.defines:
                    resids = list(
                        rna.define_residue_num_iterator(d, seq_ids=True))
                    if resids:
                        chains = {r.chain for r in resids}
                        sel = []
                        for c in chains:
                            sel.append("( %{} and chain {} and resi {}) ".format(
                                obj_name, c, "+".join(map(str, (r.resid[1] for r in resids)))))
                        selections += "select {}, ".format(
                            d + "_" + obj_name) + " or ".join(sel) + "\n"

        pymol_cmd = 'hide all\n'
        pymol_cmd += 'show cartoon, all\n'
        pymol_cmd += 'set cartoon_ring_mode\n'
        pymol_cmd += 'set cartoon_tube_radius, .3\n'
        if args.only_elements is not None:
            pymol_cmd += "hide all\n"

            for constraint in args.only_elements.split(','):
                color = pp.get_element_color(constraint)

                for r in cg.define_residue_num_iterator(constraint, seq_ids=True):
                    pymol_cmd += "show sticks, resi %r\n" % (r[1])
                    pymol_cmd += "color %s, resi %r\n" % (color, r[1])

        pymol_cmd += 'run %s\n' % (stru_filename)
        pymol_cmd += 'bg white\n'
        pymol_cmd += 'clip slab, 10000\n'
        #pymol_cmd += 'orient\n'
        pymol_cmd += selections
        if args.output is not None:
            pymol_cmd += 'ray\n'
            pymol_cmd += 'png %s\n' % (args.output)
            #pymol_cmd += 'quit\n'
        pml_filename = os.path.join(tmpdir, "command.pml")
        with open(pml_filename, "w") as f1:
            f1.write(pymol_cmd)
        if args.batch:
            p = sp.Popen(['pymol', '-cq'] + pdb_fns +
                         [pml_filename], stdout=sp.PIPE, stderr=sp.PIPE)
        else:
            p = sp.Popen(['pymol'] + pdb_fns + [pml_filename],
                         stdout=sp.PIPE, stderr=sp.PIPE)
        log.info("Now opening pymol")
        out, err = p.communicate()
        log.info("Out=\n%s", out)
        log.info("Errt=\n%s", err)
示例#4
0
def mend_breakpoints(chains, gap):
    """
    :param gap: A list of res_ids, which can be moved to mend the gap.
    """
    #raise NotImplementedError("Error")
    try:
        import moderna
    except ImportError:
        warnings.warn(
            "Cannot mend gaps in sequence, because ModeRNA is not installed!")
        return chains
    mod_models = {}
    with fus.make_temp_directory() as tmpdir:
        log.info("Writing chains %s", chains.values())

        #ftup.output_multiple_chains(chains.values(), op.join(tmpdir, "tmp.pdb"))
        for g in gap:
            if g[0].chain != g[1].chain:
                log.warning(
                    "Not mending gap between multiple chains: %s and %s", g[0],
                    g[1])
                continue
            if g[0].chain not in mod_models:
                try:
                    mod_models[g[0].chain] = moderna.load_model(
                        chains[g[0].chain], data_type="chain"
                    )  #moderna.load_model(op.join(tmpdir, "tmp.pdb"), g[0].chain)
                except Exception as e:
                    with log_to_exception(log, e):
                        log.error("g is %s, g[0] is %s, g[0].chain is %s", g,
                                  g[0], g[0].chain)
                        log.error("chains is %s", chains)
                    raise
            moderna.fix_backbone(mod_models[g[0].chain],
                                 resid_to_moderna(g[0]),
                                 resid_to_moderna(g[1]))
            #moderna.write_model(mod_models[g[0].chain], op.join(tmpdir, "tmp.pdb"))
        #for chain_id, model in mod_models.items():
        #    moderna.write_model(model,  op.join(tmpdir, "mended_{}.pdb".format(chain_id)))
        #Load back to Biopython
        mended_chains = {}
        for chain_id in chains.keys():
            if chain_id in mod_models:
                mended_chains[chain_id] = mod_models[
                    chain_id]  #Mod models are chain subclasses anyway
                log.info("Mended:", mended_chains)
                mended_chains[chain_id].id = chain_id
            else:
                mended_chains[chain_id] = chains[chain_id]
    log.info("mended_chains: %s", mended_chains)
    # Moderna may replace modified residues with "UNK" for unknown or otherrwise change the code.
    # We have to replace them back.
    for chain_id in chains:
        for res in mended_chains[chain_id]:
            changed = False
            for o_res in chains[chain_id]:
                if o_res.id[1:] == res.id[1:]:
                    log.debug("Changing Moderna residue %s to %s", res, o_res)
                    assert not changed  #Only one residue per number+icode
                    res.id = o_res.id
                    res.resname = o_res.resname
                    log.debug("Moderna residue now %s", res)
                    changed = True
    # Convert back from ModeRNA to Biopython
    out_chains = {}
    for k, v in mended_chains.items():
        s = v.get_structure()[0]
        log.error("%s, %s %s", k, s, s.child_dict)
        assert len(s.child_list) == 1
        out_chains[k] = s.child_list[0]
        out_chains[k].id = k
    return out_chains
示例#5
0
def main():
    usage = """
    ./visualize_cg.py cg_file

    Display the coarse-grain representation of a structure in pymol.
    """
    num_args = 1
    parser = OptionParser(usage=usage)

    # parser.add_option('-u', '--useless', dest='uselesss',
    # default=False, action='store_true', help='Another useless option')
    parser.add_option('-g',
                      '--highlight',
                      dest='highlight',
                      default=None,
                      help="Highlight some elements",
                      type='str')
    parser.add_option('-o',
                      '--output',
                      dest='output',
                      default=None,
                      help="Create a picture of the scene and exit",
                      type='str')
    parser.add_option('-r',
                      '--longrange',
                      dest='longrange',
                      default=False,
                      action='store_true',
                      help="Display long-range interactions")
    parser.add_option('-l',
                      '--loops',
                      dest='loops',
                      default=True,
                      action='store_false',
                      help="Don't display the coarse-grain hairpin loops")
    parser.add_option('-c',
                      '--cones',
                      dest='cones',
                      default=False,
                      action='store_true',
                      help="Display cones that portrude from the stems")
    parser.add_option('-x',
                      '--text',
                      dest='text',
                      default=False,
                      action='store_true',
                      help="Add labels to the figure.")
    parser.add_option('-a',
                      '--align',
                      dest='align',
                      default=False,
                      action='store_true',
                      help='Align all of the structures with the first')
    parser.add_option(
        '-e',
        '--encompassing-stems',
        dest='encompassing_stems',
        default=False,
        action='store_true',
        help='Show the big stems that encompass the colinear ones.')
    parser.add_option('-v',
                      '--virtual-atoms',
                      dest='virtual_atoms',
                      default=False,
                      action='store_true',
                      help='Display the virtual atoms')
    parser.add_option('-d',
                      '--distance',
                      dest='distance',
                      default=None,
                      help="Draw the lines between specified virtual residues")
    parser.add_option('-t',
                      '--residue-distance',
                      dest='residue_distance',
                      default=None,
                      help="Draw a line between residue distances")
    parser.add_option('-b',
                      '--basis',
                      dest='basis',
                      default=False,
                      action='store_true',
                      help='Display the coordinate basis of each element')
    parser.add_option('',
                      '--stem-color',
                      dest='stem_color',
                      default='green',
                      help='The default color in coarse-grain drawings')
    parser.add_option('',
                      '--multiloop-color',
                      dest='multiloop_color',
                      default='red',
                      help='The default color in coarse-grain drawings')
    parser.add_option('',
                      '--batch',
                      dest='batch',
                      default=False,
                      action='store_true',
                      help='Start pymol in batch mode')
    parser.add_option(
        '',
        '--sidechain-atoms',
        dest='sidechain_atoms',
        default=False,
        action='store_true',
        help=
        'Include the sidechain atoms. Automatically enables --virtual-atoms')
    parser.add_option(
        '',
        '--rainbow',
        dest='rainbow',
        default=False,
        action='store_true',
        help=
        'Color each of the nucleotide positions (i.e. average atoms) according to the colors of \
                      the rainbow and their position')
    parser.add_option('',
                      '--only-elements',
                      dest='only_elements',
                      default=None,
                      help='Display only these elements '
                      'element names should be '
                      'separated by commas')
    parser.add_option('',
                      '--color-gradual',
                      dest='color_gradual',
                      default=None,
                      help='Color the specified elements'
                      'gradually from one to the other, example (i1,i4,m1)',
                      type='str')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    pp = cvp.PymolPrinter()
    pp.stem_color = options.stem_color
    pp.multiloop_color = options.multiloop_color
    pp.add_loops = options.loops
    pp.draw_cones = options.cones
    # sys.exit(1)
    pp.add_longrange = options.longrange
    pp.print_text = options.text
    pp.encompassing_stems = options.encompassing_stems
    pp.virtual_atoms = options.virtual_atoms
    pp.sidechain_atoms = options.sidechain_atoms
    pp.basis = options.basis
    pp.rainbow = options.rainbow

    if options.only_elements is not None:
        pp.only_elements = options.only_elements.split(',')

    cgs = []
    for a in args:
        cgs += [cmg.CoarseGrainRNA(a)]

    if options.align:
        align_cgs(cgs)

    if options.color_gradual is not None:
        pp.element_specific_colors = dict()
        import matplotlib.pyplot as plt
        cmap = plt.get_cmap('coolwarm')

        for d in cgs[0].defines:
            pp.element_specific_colors[d] = 'black'

        to_color_nodes = options.color_gradual.split(',')
        for i, node in enumerate(to_color_nodes):
            print(node, cmap(i / float(len(to_color_nodes))))
            pp.element_specific_colors[node] = cmap(i /
                                                    float(len(to_color_nodes)))

    for i, cg in enumerate(cgs):
        if i > 0:
            pp.color_modifier = .3
            #pp.override_color = 'middle gray'

        pp.coordinates_to_pymol(cg)

    # highlight things in purple
    if options.highlight is not None:
        for s in options.highlight.split(','):
            fud.pv('s')
            pp.add_twists = False
            pp.add_stem_like(cg, s, color='purple', width=3.)

    # display the distances between nucleotides
    if options.distance is not None:
        virtual_atoms = ftug.virtual_atoms(cg, sidechain=False)

        for dist_pair in options.distance.split(':'):
            fud.pv('dist_pair')
            fr, to = dist_pair.split(',')

            fr = int(fr)
            to = int(to)

            pp.add_dashed(virtual_atoms[fr]["C1'"],
                          virtual_atoms[to]["C1'"],
                          width=1.2)

    if options.residue_distance is not None:
        dist_pair = options.residue_distance
        fr, to = dist_pair.split(',')

        fr = int(fr)
        to = int(to)

        node1 = cg.get_node_from_residue_num(to)
        node2 = cg.get_node_from_residue_num(fr)

        pos1, len1 = cg.get_position_in_element(to)
        pos2, len2 = cg.get_position_in_element(fr)

        #fud.pv('node1, node2, pos1, pos2')

        vec1 = cg.coords[node1][1] - cg.coords[node1][0]
        vec2 = cg.coords[node2][1] - cg.coords[node2][0]

        #mid1 = (cg.coords[node1][0] + cg.coords[node1][1]) / 2
        #mid2 = (cg.coords[node2][0] + cg.coords[node2][1]) / 2

        mid1 = cg.coords[node1][0] + pos1 * (vec1 / len1)
        mid2 = cg.coords[node2][0] + pos2 * (vec2 / len2)

        pp.add_sphere(mid1, 'green', width=2)
        pp.add_sphere(mid2, 'red', width=2)

    with make_temp_directory() as tmpdir:
        # The file describing the cg-structure as cylinders
        stru_filename = os.path.join(tmpdir, "structure")
        with open(stru_filename, "w") as f:
            f.write(pp.pymol_string())
        # The file for running pymol
        pymol_cmd = 'hide all\n'
        pymol_cmd += 'run %s\n' % (stru_filename)
        pymol_cmd += 'show cartoon, all\n'
        pymol_cmd += 'bg white\n'
        pymol_cmd += 'clip slab, 10000\n'
        pymol_cmd += 'orient\n'
        if options.output is not None:
            pymol_cmd += 'ray\n'
            pymol_cmd += 'png %s\n' % (options.output)
            pymol_cmd += 'quit\n'
        pml_filename = os.path.join(tmpdir, "command.pml")
        with open(pml_filename, "w") as f1:
            f1.write(pymol_cmd)
        if options.batch:
            p = sp.Popen(['pymol', '-cq', pml_filename],
                         stdout=sp.PIPE,
                         stderr=sp.PIPE)
        else:
            p = sp.Popen(['pymol', pml_filename],
                         stdout=sp.PIPE,
                         stderr=sp.PIPE)
        out, err = p.communicate()
示例#6
0
def pdb_to_json(text, name, parser=None):
    '''
    Create a graph-layout displaying a pdb file which
    presumably contains some RNA

    The text is the contents of the pdb file.

    :param text: The text of the pdb file.
    :param name: The name of the pdb file.
    :param parser: The PDB parser to use (Bio.PDB.PDBParser or Bio.PDB.MMCIFParser)
    '''
    with fus.make_temp_directory() as output_dir:
        fname = op.join(output_dir, '{}.pdb'.format(name))

        with open(fname, 'w') as f:
            # dump the pdb text to a temporary file
            f.write(text)
            f.flush()

            struct = parser.get_structure('temp', fname)
            chains = struct.get_chains()

        molecules = []

        proteins = set()
        rnas = set()

        cgs = dict()

        for chain in chains:
            # create a graph json for each structure in the pdb file
            if ftup.is_protein(chain):
                print >> sys.stderr, "protein", chain
                proteins.add(chain.id)
                # process protein
                molecules += [{
                    "type": "protein",
                    "header": "{}_{}".format(name, chain.id),
                    "seq": "",
                    "ss": "",
                    "size": len(chain.get_list()),
                    "uids": [uuid.uuid4().hex]
                }]

                pass
            elif ftup.is_rna(chain):
                print >> sys.stderr, "rna", chain
                rnas.add(chain.id)
                # process RNA molecules (hopefully)
                cg = ftmc.from_pdb(fname,
                                   chain_id=chain.id,
                                   remove_pseudoknots=True,
                                   parser=parser)
                positions = fasta_to_positions(cg.to_fasta_string())
                cg = ftmc.from_pdb(fname,
                                   chain_id=chain.id,
                                   remove_pseudoknots=False,
                                   parser=parser)

                cgs[chain.id] = cg
                molecules += [{
                    "type":
                    "rna",
                    "header":
                    "{}_{}".format(name, chain.id),
                    "seq":
                    cg.seq,
                    "ss":
                    cg.to_dotbracket_string(),
                    "size":
                    cg.seq_length,
                    "uids": [uuid.uuid4().hex for i in range(cg.seq_length)],
                    "positions":
                    positions
                }]
            else:
                # hetatm type chains which are present in MMCIF files
                pass

        # create a lookup table linking the id and residue number to the uid of
        # that nucleotide and residue number
        node_ids = dict()
        for m in molecules:
            for i, uid in enumerate(m['uids']):
                node_ids["{}_{}".format(m['header'], i + 1)] = uid

        links = []
        for (a1, a2) in ftup.interchain_contacts(struct):
            if (a1.parent.id[0] != ' ' or a2.parent.id[0] != ' '):
                #hetatm's will be ignored for now
                continue

            chain1 = a1.parent.parent.id
            chain2 = a2.parent.parent.id

            # the source and target values below need to be reduced by the length of the
            # nodes array because when the jsons are added to the graph, the link
            # source and target are incremented so as to correspond to the new indeces
            # of the nodes
            # so a link to a node at position 10, if there are 50 nodes, will have to have
            # a source value of -40
            if (chain1 in proteins and chain2 in rnas):
                # get the index of this nucleotide in the secondary structure
                sid = cgs[chain2].seq_ids.index(a2.parent.id)

                links += [{
                    "source":
                    node_ids["{}_{}_{}".format(name, chain2, sid + 1)],
                    "target":
                    node_ids["{}_{}_{}".format(name, chain1, 1)],
                    "link_type":
                    "protein_chain",
                    "value":
                    3
                }]
            elif (chain2 in proteins and chain1 in rnas):
                # get the index of this nucleotide in the secondary structure

                sid = cgs[chain1].seq_ids.index(a1.parent.id)

                links += [{
                    "source":
                    node_ids["{}_{}_{}".format(name, chain1, sid + 1)],
                    "target":
                    node_ids["{}_{}_{}".format(name, chain2, 1)],
                    "link_type":
                    "protein_chain",
                    "value":
                    3
                }]
            elif (chain2 in rnas and chain1 in rnas):
                # get the index of this nucleotide in the secondary structure

                sid1 = cgs[chain1].seq_ids.index(a1.parent.id)
                sid2 = cgs[chain2].seq_ids.index(a2.parent.id)

                links += [{
                    "source":
                    node_ids["{}_{}_{}".format(name, chain1, sid1 + 1)],
                    "target":
                    node_ids["{}_{}_{}".format(name, chain2, sid2 + 1)],
                    "link_type":
                    "chain_chain",
                    "value":
                    3
                }]

        return {"molecules": molecules, "extra_links": links}
示例#7
0
def main(args):
    rnas = fuc.cgs_from_args(args, '+', '3d')
    pp = pymol_printer_from_args(args)

    if args.align:
        align_rnas(rnas)
    if args.labels:
        label_list = args.labels.split(",")
        labels = {}
        for label in label_list:
            if not label:
                continue
            try:
                elem, lab = label.split(':')
            except ValueError:
                raise ValueError(
                    "Please specify --labels with as list of colon-seperated tuples. Found invalid entry {}."
                    .format(repr(label)))
            labels[elem] = lab
        if not pp.print_text:
            labels = defaultdict(lambda: "", labels)
            pp.print_text = True
    else:
        labels = {}

    color_modifier = 1.0
    log.info("Visualizing {} rnas".format(len(rnas)))
    for rna in rnas:
        pp.add_cg(rna, labels, color_modifier)
        color_modifier *= 0.7

    with make_temp_directory() as tmpdir:
        # The file describing the cg-structure as cylinders
        if args.pymol_file:
            stru_filename = args.pymol_file
        else:
            stru_filename = os.path.join(tmpdir, "structure")
        with open(stru_filename, "w") as f:
            f.write(pp.pymol_string())

        pdb_fns = []
        selections = ""
        for i, rna in enumerate(rnas):
            if rna.chains:
                obj_name = "pdb{}_{}".format(i, rna.name.replace("-", "_"))
                fn = os.path.join(tmpdir, obj_name + ".cif")
                pdb_fns.append(fn)
                ftup.output_multiple_chains(rna.chains.values(), fn, "cif")
                for d in rna.defines:
                    resids = list(
                        rna.define_residue_num_iterator(d, seq_ids=True))
                    if resids:
                        chains = {r.chain for r in resids}
                        sel = []
                        for c in chains:
                            sel.append(
                                "( %{} and chain {} and resi {}) ".format(
                                    obj_name, c, "+".join(
                                        map(str,
                                            (r.resid[1] for r in resids)))))
                        selections += "select {}, ".format(
                            d + "_" + obj_name) + " or ".join(sel) + "\n"

        pymol_cmd = 'hide all\n'
        pymol_cmd += 'show cartoon, all\n'
        pymol_cmd += 'set cartoon_ring_mode\n'
        pymol_cmd += 'set cartoon_tube_radius, .3\n'
        if args.only_elements is not None:
            pymol_cmd += "hide all\n"

            for constraint in args.only_elements.split(','):
                color = pp.get_element_color(constraint)

                for r in cg.define_residue_num_iterator(constraint,
                                                        seq_ids=True):
                    pymol_cmd += "show sticks, resi %r\n" % (r[1])
                    pymol_cmd += "color %s, resi %r\n" % (color, r[1])

        pymol_cmd += 'run %s\n' % (stru_filename)
        pymol_cmd += 'bg white\n'
        pymol_cmd += 'clip slab, 10000\n'
        #pymol_cmd += 'orient\n'
        pymol_cmd += selections
        if args.output is not None:
            pymol_cmd += 'ray\n'
            pymol_cmd += 'png %s\n' % (args.output)
            #pymol_cmd += 'quit\n'
        pml_filename = os.path.join(tmpdir, "command.pml")
        with open(pml_filename, "w") as f1:
            f1.write(pymol_cmd)
        if args.batch:
            p = sp.Popen(['pymol', '-cq'] + pdb_fns + [pml_filename],
                         stdout=sp.PIPE,
                         stderr=sp.PIPE)
        else:
            p = sp.Popen(['pymol'] + pdb_fns + [pml_filename],
                         stdout=sp.PIPE,
                         stderr=sp.PIPE)
        log.info("Now opening pymol")
        out, err = p.communicate()
        log.info("Out=\n%s", out)
        log.info("Errt=\n%s", err)
示例#8
0
def pdb_to_json(text, name):
    '''
    Create a graph-layout displaying a pdb file which
    presumably contains some RNA

    The text is the contents of the pdb file.
    '''
    with fus.make_temp_directory() as output_dir:
        fname = op.join(output_dir, '{}.pdb'.format(name))

        with open(fname, 'w') as f:
            # dump the pdb text to a temporary file
            f.write(text)
            f.flush

            struct = bpdb.PDBParser().get_structure('temp', fname)
            chains = struct.get_chains()

        jsons = []

        proteins = set()
        rnas = set()

        cgs = dict()

        for chain in chains:
            # create a graph json for each structure in the pdb file
            if ftup.is_protein(chain):
                proteins.add(chain.id)
                # process protein
                jsons += [{
                    "nodes": [{
                        "group": 2,
                        "struct_name": "{}_{}".format(name, chain.id),
                        "id": 1,
                        "size": len(chain.get_list()),
                        "name": chain.id,
                        "node_type": "protein"
                    }],
                    "links": []
                }]
                pass
            else:
                rnas.add(chain.id)
                # process RNA molecules (hopefully)
                cg = ftmc.from_pdb(fname, chain_id=chain.id)
                cgs[chain.id] = cg
                jsons += [bg_to_json(cg)]

        # create a lookup table to find out the index of each node in the
        # what will eventually become the large list of nodes
        counter = 0
        node_ids = dict()
        for j in jsons:
            for n in j['nodes']:
                node_ids["{}_{}".format(n['struct_name'], n['id'])] = counter
                counter += 1

        links = []
        for (a1, a2) in ftup.interchain_contacts(struct):
            if (a1.parent.id[0] != ' ' or a2.parent.id[0] != ' '):
                #hetatm's will be ignored for now
                continue

            chain1 = a1.parent.parent.id
            chain2 = a2.parent.parent.id

            # the source and target values below need to be reduced by the length of the
            # nodes array because when the jsons are added to the graph, the link
            # source and target are incremented so as to correspond to the new indeces
            # of the nodes
            # so a link to a node at position 10, if there are 50 nodes, will have to have
            # a source value of -40
            if (chain1 in proteins and chain2 in rnas):
                # get the index of this nucleotide in the secondary structure
                sid = cgs[chain2].seq_ids.index(a2.parent.id)

                links += [{
                    "source":
                    node_ids["{}_{}_{}".format(name, chain2, sid + 1)] -
                    counter,
                    "target":
                    node_ids["{}_{}_{}".format(name, chain1, 1)] - counter,
                    "link_type":
                    "protein_chain",
                    "value":
                    3
                }]
            elif (chain2 in proteins and chain1 in rnas):
                # get the index of this nucleotide in the secondary structure

                sid = cgs[chain1].seq_ids.index(a1.parent.id)

                links += [{
                    "source":
                    node_ids["{}_{}_{}".format(name, chain1, sid + 1)] -
                    counter,
                    "target":
                    node_ids["{}_{}_{}".format(name, chain2, 1)] - counter,
                    "link_type":
                    "protein_chain",
                    "value":
                    3
                }]
            elif (chain2 in rnas and chain1 in rnas):
                # get the index of this nucleotide in the secondary structure

                sid1 = cgs[chain1].seq_ids.index(a1.parent.id)
                sid2 = cgs[chain2].seq_ids.index(a2.parent.id)

                links += [{
                    "source":
                    node_ids["{}_{}_{}".format(name, chain1, sid1 + 1)] -
                    counter,
                    "target":
                    node_ids["{}_{}_{}".format(name, chain2, sid2 + 1)] -
                    counter,
                    "link_type":
                    "chain_chain",
                    "value":
                    3
                }]

        #jsons += [{'nodes': [], "links": links}]
        jsons += [{"nodes": [], "links": links}]
        return {"jsons": jsons, "extra_links": links}