Exemplo n.º 1
0
    def test_mcc(self):
        cg1 = ftmc.CoarseGrainRNA('test/forgi/threedee/data/1GID_A.cg')
        cg2 = ftmc.CoarseGrainRNA('test/forgi/threedee/data/1GID_A_sampled.cg')

        cm = ftme.confusion_matrix(cg1, cg2)
        mcc = ftme.mcc(cm)

        self.assertTrue(mcc < 1.0)

        cm = ftme.confusion_matrix(cg2, cg2)
        mcc = ftme.mcc(cm)

        self.assertLess(abs(mcc - 1.0), 0.01)

        pass
Exemplo n.º 2
0
def main(args):
    with fuc.hide_traceback():
        cg1, cg2 = fuc.cgs_from_args(args, rna_type="3d", enable_logging=True)

        if not (args.acc or args.rmsd or args.pdb_rmsd):
            showall = True
        else:
            showall = False
        if showall or args.acc:
            if cg1.defines != cg2.defines:
                if args.acc:
                    print(
                        "Cannot compare two 3d structures that do not correspond to the same RNA."
                    )
                    sys.exit(1)
            else:
                adj = ftms.AdjacencyCorrelation(cg1)
                print("ACC:\t{:.3f}".format(ftms.mcc(adj.evaluate(cg2))))
        if showall or args.rmsd:
            print("RMSD:\t{:.3f}".format(ftms.cg_rmsd(cg1, cg2)))
        if showall or args.pdb_rmsd:
            if not pdb_rmsd(cg1, cg2):
                # If --pdb-rmsd was not given, just don't print it.
                # If it was given, we exit with non-zero exit status.
                if args.pdb_rmsd:
                    print(
                        "Cannot calculate PDB-RMSD: The two files do not contain the same chains."
                    )
                    sys.exit(1)
Exemplo n.º 3
0
def main():
    usage = """
    python calculate_mcc.py struct1.cg struct2.cg
    """
    num_args= 0
    parser = OptionParser(usage=usage)

    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    cg1 = ftmc.CoarseGrainRNA(args[0])
    cg2 = ftmc.CoarseGrainRNA(args[1])

    confusion_matrix = ftme.confusion_matrix(cg1, cg2)

    
    print "confusion_matrix:", confusion_matrix
    print "mcc:", ftme.mcc(confusion_matrix)
    print "rmsd:", ftme.cg_rmsd(cg1, cg2)
Exemplo n.º 4
0
def main(args):
    with fuc.hide_traceback():
        cg1, cg2 = fuc.cgs_from_args(
            args, rna_type="3d", enable_logging=True)

        if not (args.acc or args.rmsd or args.pdb_rmsd):
            showall = True
        else:
            showall = False
        if showall or args.acc:
            if cg1.defines != cg2.defines:
                if args.acc:
                    print(
                       "Cannot compare two 3d structures that do not correspond to the same RNA.")
                    sys.exit(1)
            else:
                adj = ftms.AdjacencyCorrelation(cg1)
                print("ACC:\t{:.3f}".format(ftms.mcc(adj.evaluate(cg2))))
        if showall or args.rmsd:
            print("RMSD:\t{:.3f}".format(ftms.cg_rmsd(cg1, cg2)))
        if showall or args.pdb_rmsd:
            if not pdb_rmsd(cg1, cg2):
                # If --pdb-rmsd was not given, just don't print it.
                # If it was given, we exit with non-zero exit status.
                if args.pdb_rmsd:
                    print(
                        "Cannot calculate PDB-RMSD: The two files do not contain the same chains.")
                    sys.exit(1)
Exemplo n.º 5
0
    def test_new_confusionmatrix_is_like_old(self):
        cg1 = ftmc.CoarseGrainRNA('test/forgi/threedee/data/1GID_A.cg')
        cg2 = ftmc.CoarseGrainRNA('test/forgi/threedee/data/1GID_A_sampled.cg')

        cm = confusion_matrix(cg1, cg2)
        mcc = ftme.mcc(cm)
        cm_new = ftme.AdjacencyCorrelation(cg1) #previousely named ConfusionMatrix
        mcc_n = ftme.mcc(cm_new.evaluate(cg2))
        self.assertAlmostEqual(mcc, mcc_n)
        self.assertAlmostEqual(mcc_n, 0.6756639246921762)


        cm = confusion_matrix(cg1, cg1)
        mcc = ftme.mcc(cm)
        mcc_n = ftme.mcc(cm_new.evaluate(cg1))
        self.assertAlmostEqual(mcc, mcc_n)
        self.assertAlmostEqual(mcc_n, 1.0)
Exemplo n.º 6
0
    def test_mcc(self):
        cg1 = ftmc.CoarseGrainRNA.from_bg_file(
            'test/forgi/threedee/data/1GID_A.cg')
        cg2 = ftmc.CoarseGrainRNA.from_bg_file(
            'test/forgi/threedee/data/1GID_A_sampled.cg')

        cm = confusion_matrix(cg1, cg2)
        mcc = ftme.mcc(cm)

        self.assertTrue(mcc < 1.0)

        cm = confusion_matrix(cg2, cg2)
        mcc = ftme.mcc(cm)

        self.assertLess(abs(mcc - 1.0), 0.01)

        pass
Exemplo n.º 7
0
    def test_new_confusionmatrix_is_like_old(self):
        cg1 = ftmc.CoarseGrainRNA('test/forgi/threedee/data/1GID_A.cg')
        cg2 = ftmc.CoarseGrainRNA('test/forgi/threedee/data/1GID_A_sampled.cg')

        cm = ftme.confusion_matrix(cg1, cg2)
        mcc = ftme.mcc(cm)
        cm_new = ftme.AdjacencyCorrelation(
            cg1)  #previousely named ConfusionMatrix
        mcc_n = ftme.mcc(cm_new.evaluate(cg2))
        self.assertAlmostEqual(mcc, mcc_n)
        self.assertLess(abs(mcc_n - 0.0761),
                        0.0001)  #0.086 for distance=30, 0.0761 for 25

        cm = ftme.confusion_matrix(cg1, cg1)
        mcc = ftme.mcc(cm)
        mcc_n = ftme.mcc(cm_new.evaluate(cg1))
        self.assertAlmostEqual(mcc, mcc_n)
        self.assertAlmostEqual(mcc_n, 1.0)
Exemplo n.º 8
0
 def update(self, sm, step):
     try:
         if self.silent:
             return
         else:
             acc = ftme.mcc(self._cm_calc.evaluate(sm.bg))
             self.history[0].append(acc)
         return "{:5.3f}".format(acc)
     except ZeroDivisionError:
         return "{:5.3f}".format(float("nan"))
Exemplo n.º 9
0
    def test_new_confusionmatrix_is_like_old(self):
        cg1 = ftmc.CoarseGrainRNA.from_bg_file(
            'test/forgi/threedee/data/1GID_A.cg')
        cg2 = ftmc.CoarseGrainRNA.from_bg_file(
            'test/forgi/threedee/data/1GID_A_sampled.cg')

        cm = confusion_matrix(cg1, cg2)
        mcc = ftme.mcc(cm)
        # previousely named ConfusionMatrix
        cm_new = ftme.AdjacencyCorrelation(cg1)
        mcc_n = ftme.mcc(cm_new.evaluate(cg2))
        self.assertAlmostEqual(mcc, mcc_n)
        self.assertAlmostEqual(mcc_n, 0.6756639246921762)

        cm = confusion_matrix(cg1, cg1)
        mcc = ftme.mcc(cm)
        mcc_n = ftme.mcc(cm_new.evaluate(cg1))
        self.assertAlmostEqual(mcc, mcc_n)
        self.assertAlmostEqual(mcc_n, 1.0)
Exemplo n.º 10
0
 def update(self, sm, step):
     try:
         if self.silent:
             return
         else:
             acc = ftme.mcc(self._cm_calc.evaluate(sm.bg))
             self.history[0].append(acc)
         return "{:5.3f}".format(acc)
     except ZeroDivisionError:
         self.history[0].append(float("nan"))
         return "{:5.3f}".format(float("nan"))
Exemplo n.º 11
0
def main(args):

    if len(args.compareTo)==1:
        cg1 = ftmc.CoarseGrainRNA(args.reference[0])
        cg2 = ftmc.CoarseGrainRNA(args.compareTo[0])
        print (ftms.cg_rmsd(cg1, cg2))
    else:
        print ("{:15}\t{:6}\t{:6}\t{:6}".format("filename","RMSD", "dRMSD", "ACC"))
        ref_cg = ftmc.CoarseGrainRNA(args.reference[0])
        reference = ref_cg.get_ordered_virtual_residue_poss()
        acc_calc = ftms.AdjacencyCorrelation(ref_cg)
        for filename in args.compareTo:
            cg=ftmc.CoarseGrainRNA(filename)
            curr_vress=cg.get_ordered_virtual_residue_poss()
            rmsd  = ftur.rmsd(reference, curr_vress)
            drmsd = ftur.drmsd(reference, curr_vress)
            acc   = ftms.mcc(acc_calc.evaluate(cg))
            print ("{:15}\t{:6.3f}\t{:6.3f}\t{:6.3f}".format(filename[-15:], rmsd, drmsd, acc))
Exemplo n.º 12
0
def main(args):

    if len(args.compareTo) == 1:
        cg1 = ftmc.CoarseGrainRNA(args.reference[0])
        cg2 = ftmc.CoarseGrainRNA(args.compareTo[0])
        print(ftms.cg_rmsd(cg1, cg2))
    else:
        print("{:15}\t{:6}\t{:6}\t{:6}".format("filename", "RMSD", "dRMSD",
                                               "ACC"))
        ref_cg = ftmc.CoarseGrainRNA(args.reference[0])
        reference = ref_cg.get_ordered_virtual_residue_poss()
        acc_calc = ftms.AdjacencyCorrelation(ref_cg)
        for filename in args.compareTo:
            cg = ftmc.CoarseGrainRNA(filename)
            curr_vress = cg.get_ordered_virtual_residue_poss()
            rmsd = ftur.rmsd(reference, curr_vress)
            drmsd = ftur.drmsd(reference, curr_vress)
            acc = ftms.mcc(acc_calc.evaluate(cg))
            print("{:15}\t{:6.3f}\t{:6.3f}\t{:6.3f}".format(
                filename[-15:], rmsd, drmsd, acc))
Exemplo n.º 13
0
def main():
    usage = """
    python calculate_mcc.py struct1.cg struct2.cg
    """
    num_args = 0
    parser = OptionParser(usage=usage)

    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    cg1 = ftmc.CoarseGrainRNA(args[0])
    cg2 = ftmc.CoarseGrainRNA(args[1])

    confusion_matrix = ftme.confusion_matrix(cg1, cg2)

    print "confusion_matrix:", confusion_matrix
    print "mcc:", ftme.mcc(confusion_matrix)
    print "rmsd:", ftme.cg_rmsd(cg1, cg2)
Exemplo n.º 14
0
def main():
    usage = """
    python cg_to_fornac_html.py file1.cg file2.cg

    Convert coarse grain files to html files using fornac
    to display a 2D version of the structure.
    """
    num_args = 1
    parser = OptionParser(usage=usage)

    parser.add_option(
        '-d',
        '--distance',
        dest='distance',
        default=25,
        help=
        "Draw links between elements that are within a certain distance from each other",
        type='float')
    parser.add_option(
        '-b',
        '--bp-distance',
        dest='bp_distance',
        default=16,
        help=
        "Draw links only between nucleotides which are so many nucleotides apart",
        type='int')
    parser.add_option('-s',
                      '--sort-by',
                      dest='sort_by',
                      default='mcc',
                      help="What to sort by (options: mcc, pca)",
                      type='string')
    parser.add_option('-n',
                      '--names',
                      dest='names',
                      default=False,
                      action='store_true',
                      help='Add the name of the structure to the display')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    structs = []
    pair_bitmaps = []
    cgs = []
    all_links = []
    mccs = []
    cm = None
    for filename in args:
        cg = ftmc.CoarseGrainRNA(filename)
        cgs += [cg]
        if not cm:
            cm = ftme.AdjacencyCorrelation(cg)
        (links, pair_bitmap) = extract_extra_links(
            cg,
            options.distance,
            options.bp_distance,
            correct_links=None if len(all_links) == 0 else all_links[0])

        all_links += [links]

        pair_bitmaps += [pair_bitmap]
        mcc = ftme.mcc(cm.evaluate(cg))
        rmsd = ftme.cg_rmsd(cgs[0], cg)

        seq_struct = {
            "sequence": cg.seq,
            "structure": cg.to_dotbracket_string(),
            "extraLinks": links
        }

        fud.pv('options.names')
        fud.pv('mcc, rmsd')
        if options.names:
            seq_struct['name'] = op.basename(
                filename) + " ({:.2f},{:.1f})".format(mcc, rmsd)
        else:
            seq_struct['name'] = ''

        structs += [seq_struct]
        mccs += [mcc]

    if options.sort_by == 'pca':
        print("Sorting by pca", file=sys.stderr)
        ix = reorder_structs(pair_bitmaps)
    else:
        print("Sorting by mcc", file=sys.stderr)
        ix = np.argsort(-np.array(mccs))

    new_array = [0 for i in range(len(ix))]
    for i, x in enumerate(ix):
        new_array[i] = structs[x]

    print(output_template.format(json.dumps(new_array)))
Exemplo n.º 15
0
    def update_statistics(self, energy_function, sm, prev_energy, tracking_energies = None, tracked_energies=None):
        '''
        Add a newly sampled structure to the set of statistics.

        :param energy_function: The energy_function used to evaluate the structure.
        :param sm: The spatial model that was sampled.
        :param prev_energy: The evaluated (accepted) energy of the current step 
        :tracking_energyis: The energy_functions which are calculated for statistics, but not used for sampling.
        :tracked_energies: The energy values of the tracking_energies.
        '''
        self.counter += 1

        if self.energy_orig is None:
            self.energy_orig = 0.
            try:
                self.sm_orig.bg.add_all_virtual_residues()
                self.energy_orig = energy_function.eval_energy(self.sm_orig)
            except KeyError:
                # most likely no native structure was provided
                pass

        energy = prev_energy
        #energy = energy_function.eval_energy(sm, background=True)
        if energy_function.uses_background():
            energy_nobg = energy_function.eval_energy(sm, background=False)
        else:
            energy_nobg=energy

        mcc = None

        if self.centers_orig is not None:
            r = 0.
            if not self.no_rmsd:
                centers_new = ftug.bg_virtual_residues(sm.bg)
                r = cbr.centered_rmsd(self.centers_orig, centers_new)
                #r = cbr.drmsd(self.centers_orig, centers_new)
                cm = self.confusion_matrix_calculator.evaluate(sm.bg)
                mcc = ftme.mcc(cm)
        else:            
            # no original coordinates provided so we can't calculate rmsds
            r = 0.

        dist = None
        dist2 = None

        cg = sm.bg
        dists = []

        for (self.dist1, self.dist2) in self.dists:
            node1 = cg.get_node_from_residue_num(self.dist1)
            node2 = cg.get_node_from_residue_num(self.dist2)

            pos1, len1 = cg.get_position_in_element(self.dist1)
            pos2, len2 = cg.get_position_in_element(self.dist2)

            #fud.pv('node1, node2, pos1, pos2')

            vec1 = cg.coords[node1][1] - cg.coords[node1][0]
            vec2 = cg.coords[node2][1] - cg.coords[node2][0]

            #mid1 = (cg.coords[node1][0] + cg.coords[node1][1]) / 2
            #mid2 = (cg.coords[node2][0] + cg.coords[node2][1]) / 2

            mid1 = cg.coords[node1][0] + pos1 * (vec1 / len1)
            mid2 = cg.coords[node2][0] + pos2 * (vec2 / len2)
            
            #fud.pv('mid1, mid2')

            dists += [ftuv.vec_distance(mid1, mid2)]

        #self.energy_rmsd_structs += [(energy, r, sm.bg)]
        self.energy_rmsd_structs += [(energy_nobg, r, copy.deepcopy(sm.bg))]
        #self.energy_rmsd_structs += [(energy, r, sm.bg.copy())]

        sorted_energies = sorted(self.energy_rmsd_structs, key=lambda x: x[0])
        self.energy_rmsd_structs = sorted_energies[:self.save_n_best]

        if r > self.highest_rmsd:
            self.highest_rmsd = r

        if r < self.lowest_rmsd:
            self.lowest_rmsd = r

        lowest_energy = sorted_energies[0][0]
        lowest_rmsd = sorted_energies[0][1]

        '''
        if energy == lowest_energy:
            for key in sm.angle_defs:
                print >>sys.stderr, key, str(sm.angle_defs[key])
        '''

        if not self.silent:
            if self.verbose:
                '''
                for energy_func in energy_function.energies:
                    print energy_func.__class__.__name__, energy_func.eval_energy(sm)
                '''
            _, rog=fbe.length_and_rog(sm.bg)
            #output_str = u"native_energy [{:s} {:d}]: {:3d} {:5.03g} {:5.3f} ROG: {:5.3f} | min:
            output_str = u"native_energy [%s %d]: %3d %5.03g  %5.3f ROG: %5.3f | min: %5.2f (%5.2f) %5.2f | extreme_rmsds: %5.2f %5.2f (%.2f)" % ( sm.bg.name, sm.bg.seq_length, self.counter, energy, r , rog, lowest_energy, self.energy_orig, lowest_rmsd, self.lowest_rmsd, self.highest_rmsd, energy_nobg)
            output_str += " |"

            # assume that the energy function is a combined energy
            if isinstance(self.energy_function, fbe.CombinedEnergy):
                for e in self.energy_function.iterate_energies():
                    if isinstance(e,fbe.DistanceExponentialEnergy):
                        output_str += " [clamp {},{}: {:.1f}]".format(e.from_elem,
                                                                      e.to_elem,
                                                                      e.get_distance(sm))
            if tracked_energies and tracking_energies:
                output_str += " | [tracked Energies]"
                for i,e in enumerate(tracking_energies):
                    sn=e.shortname()
                    if len(sn)>12:
                        sn=sn[:9]+"..."
                    output_str += "  [{}]: ".format(sn)
                    output_str += "%5.03g" % (tracked_energies[i])
            elif tracking_energies:
                output_str += " | [tracked Energies]"
                for e in tracking_energies:
                    sn=e.shortname()
                    if len(sn)>12:
                        sn=sn[:9]+"..."
                    output_str += "  [{}]: ".format(sn)
                    output_str += "%5.03g" % (e.eval_energy(sm))

            if dist:
                output_str += " | dist %.2f" % (dist)

            for dist2 in dists:
                if dist2 is not None:
                    output_str += " | [dist2: %.2f]" % (dist2)

            if mcc is not None:
                output_str += " | [mcc: %.3f]" % (mcc)

            output_str += " [time: %.1f]" % (time.time() - self.creation_time)

            #Print to both STDOUT and the log file.
            if self.output_file != sys.stdout:
                print (output_str.strip())

            if self.output_file != None:
                print(output_str, file=self.output_file)
                self.output_file.flush()

        self.update_plots(energy, r)

        '''
        if self.counter % 1000 == 0:
            import pdb; pdb.set_trace()
        '''

        if self.counter % 10 == 0:
            if not self.silent:
                self.save_top(self.save_n_best, counter=self.counter)

        if self.step_save > 0 and self.counter % self.step_save == 0:
            #If a projection match energy was used, save the optimal projection direction to the file.
            if isinstance(self.energy_function, fbe.CombinedEnergy):
                for e in self.energy_function.iterate_energies():
                    if hasattr(e, "accepted_projDir"):
                        sm.bg.project_from=e.accepted_projDir
            sm.bg.to_cg_file(os.path.join(cbc.Configuration.sampling_output_dir, 'step%06d.coord' % (self.counter)))
Exemplo n.º 16
0
def main():
    usage = """
    python cg_to_fornac_html.py file1.cg file2.cg

    Convert coarse grain files to html files using fornac
    to display a 2D version of the structure.
    """
    num_args= 1
    parser = OptionParser(usage=usage)

    parser.add_option('-d', '--distance', dest='distance', default=25, help="Draw links between elements that are within a certain distance from each other", type='float')
    parser.add_option('-b', '--bp-distance', dest='bp_distance', default=16, help="Draw links only between nucleotides which are so many nucleotides apart", type='int')
    parser.add_option('-s', '--sort-by', dest='sort_by', default='mcc', help="What to sort by (options: mcc, pca)", type='string')
    parser.add_option('-n', '--names', dest='names', default=False, action='store_true', help='Add the name of the structure to the display')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    structs = []
    pair_bitmaps = []
    cgs = []
    all_links = []
    mccs = []
    cm=None
    for filename in args:
        cg = ftmc.CoarseGrainRNA(filename)
        cgs += [cg]
        if not cm:
            cm=ftme.AdjacencyCorrelation(cg)
        (links, pair_bitmap) = extract_extra_links(cg, options.distance, options.bp_distance,
                                                  correct_links = None if len(all_links) == 0 else all_links[0])

        all_links += [links]

        pair_bitmaps += [pair_bitmap]
        mcc = ftme.mcc(cm.evaluate(cg))
        rmsd = ftme.cg_rmsd(cgs[0], cg)

        seq_struct = {"sequence": cg.seq,
                      "structure": cg.to_dotbracket_string(),
                      "extraLinks": links}

        fud.pv('options.names')
        fud.pv('mcc, rmsd')
        if options.names:
            seq_struct['name'] = op.basename(filename) + " ({:.2f},{:.1f})".format(mcc, rmsd)
        else:
            seq_struct['name'] = ''

        structs += [seq_struct]
        mccs += [mcc]

    if options.sort_by == 'pca':
        print >>sys.stderr, "Sorting by pca"
        ix = reorder_structs(pair_bitmaps) 
    else:
        print >>sys.stderr, "Sorting by mcc"
        ix = np.argsort(-np.array(mccs))

    new_array = [0 for i in range(len(ix))]
    for i,x in enumerate(ix):
        new_array[i] = structs[x]

    print output_template.format(json.dumps(new_array))