Пример #1
0
def plot_rmsd(nucs, structure, cols):
    import matplotlib.pyplot as plt
    from .util import ceil_div
    from collections import defaultdict

    rmsdss = defaultdict(list)
    for nuc in nucs:
        with HDFFile(nuc, "r") as f:
            for chromosome, pos, rmsds in rmsd(f, structure):
                rmsdss[chromosome].append((pos, rmsds))
    fig, axs = plt.subplots(ceil_div(len(rmsdss), cols),
                            cols,
                            sharex=True,
                            sharey=True)
    if cols == 1:
        # Fix matplotlib's return type
        axs = [axs]
    for ax, (chromosome, data) in zip(chain.from_iterable(axs),
                                      sorted(rmsdss.items())):
        for poss, rmsds in data:
            ax.plot(poss, rmsds)
        ax.set_ylabel('\n'.join(("RMSD", chromosome)))
    for ax in axs[-1]:
        ax.set_xlabel("Genome Position (bp)")
    plt.show()
Пример #2
0
def plot_stats(nucs, structure, param, violation_padding):
    import matplotlib.pyplot as plt

    stat_names = {"scale": {}, "violations": {'padding': violation_padding}}
    stats = defaultdict(lambda: np.empty((len(nucs), 3), dtype='float'))

    fig, axs = plt.subplots(len(stat_names), 1)

    for (i, nuc), (stat, kwargs) in cartesian(enumerate(nucs),
                                              stat_names.items()):
        with HDFFile(nuc, "r") as f:
            param_value = f['structures'][structure]['calculation'].attrs[
                param]
            if param_value == "particle_sizes":
                param_value["particle_sizes"] = param_value["particle_sizes"][
                    -1]
            stat_values = globals()[stat](f, structure, **kwargs)
            stats[stat][i] = [
                param_value,
                np.mean(stat_values),
                np.std(stat_values)
            ]

    for ax, (stat_name, data) in zip(axs, stats.items()):
        ax.set_ylabel(stat_name)
        ax.set_xlabel(param)
        data = np.sort(data, axis=0)
        ax.errorbar(data.T[0], data.T[1], yerr=data.T[2])
    fig.tight_layout()
    plt.show()
Пример #3
0
def linkage(nuc, structure):
    with HDFFile(nuc, "r") as f:
        coords = np.concatenate(
            list(f['structures'][structure]['coords'].values()), axis=1
        )
        return hierarchy.linkage(
            coords.reshape(coords.shape[0], -1), method='single',
            metric=distance
        )
Пример #4
0
def align(nuc, target, structure, mirror=True):
    with HDFFile(nuc, "r+") as f:
        coordss = f['structures'][structure]['coords']
        all_coords = np.concatenate(list(coordss.values()), axis=1)
        if target == 'median':
            ref = np.median(all_coords, axis=0)
        else:
            ref = all_coords[int(target)]
        xforms = list(
            map(partial(least_squares, ref, mirror=mirror), all_coords))
        for chr, coords in coordss.items():
            coords[:] = np.array(list(map(op.matmul, xforms, coords)),
                                 dtype=coords.dtype)
Пример #5
0
def output_rmsd(nucs, structure, position):
    from functools import partial

    nucs_rmsds = []
    for nuc in nucs:
        with HDFFile(nuc, "r") as f:
            nuc_rmsds = {}
            for chromo, positions, rmsds in rmsd(f, structure):
                nuc_rmsds.update(zip(zip(repeat(chromo), positions), rmsds))
            nucs_rmsds.append(nuc_rmsds)
    conserved = set.intersection(*map(set, nucs_rmsds))
    if position:
        positions = filter(partial(op.contains, conserved), position)
    else:
        positions = sorted(conserved)
    for chromo, pos in positions:
        print("{}:{} {}".format(chromo, pos,
                                max(rmsd[chromo, pos] for rmsd in nucs_rmsds)))
Пример #6
0
def test_rmsd_cli(tmpdir):
    from nuc_analyze.main import cli

    files = [tmpdir.join("test1.nuc"), tmpdir.join("test2.nuc")]

    for p, nuc in zip(files, nucs):
        with HDFFile(p, 'w') as f:
            for chromo, coords in sorted(
                    nuc['structures']['0']['coords'].items()):
                f.create_dataset('structures/0/coords/{}'.format(chromo),
                                 data=coords)
            for chromo, particle in nuc['structures']['0']['particles'].items(
            ):
                f.create_dataset(
                    'structures/0/particles/{}/positions'.format(chromo),
                    data=particle['positions'])

    expected = ["1:10 0.0", "1:200 0.5", "X:100 2.0"]

    runner = CliRunner()
    result = runner.invoke(cli, ["rmsd", str(p)])
    assert result.exit_code == 0
    assert result.output == '\n'.join(expected) + '\n'
Пример #7
0
def test_csv(tmpdir):
    from nuc_analyze.main import cli
    from nuc_analyze.stats import flatten_dict
    from math import sqrt
    p = tmpdir.join("test.nuc")

    with HDFFile(p, 'w') as f:
        for chromo, coords in nuc['structures']['0']['coords'].items():
            f.create_dataset('structures/0/coords/{}'.format(chromo),
                             data=coords)
        flat_restraints = flatten_dict(nuc['structures']['0']['restraints'])
        for (chr_a, chr_b), restraints in flat_restraints.items():
            f.create_dataset('structures/0/restraints/{}/{}'.format(
                chr_a, chr_b),
                             data=restraints)
        calculation = f.create_group('structures/0/calculation')
        for attr, v in nuc['structures']['0']['calculation']['attrs'].items():
            calculation.attrs[attr] = v

    scales = [0.0, sqrt(2) / 2]
    violations = [1, 4]
    expected = [
        'test.nuc',
        np.mean(scales),
        np.std(scales),
        np.mean(violations),
        np.std(violations), 1
    ]

    runner = CliRunner()
    result = runner.invoke(cli, ["stats", str(p), "--param", "foo"])
    assert result.exit_code == 0
    out = iter(result.output.splitlines())
    assert next(
        out
    ) == 'filename,scale_mean,scale_std,violations_mean,violations_std,foo'
    assert next(out) == ','.join(map(str, expected))
Пример #8
0
def stats(nucs, structure, param, violation_padding):
    import csv
    from sys import stdout

    stat_names = {"scale": {}, "violations": {'padding': violation_padding}}
    stat_cols = list(
        chain.from_iterable(("{}_mean".format(s), "{}_std".format(s))
                            for s in sorted(stat_names)))

    writer = csv.DictWriter(stdout, ["filename"] + stat_cols + list(param))
    writer.writeheader()
    for nuc in nucs:
        with HDFFile(nuc, "r") as f:
            params = f['structures'][structure]['calculation'].attrs
            params = {k: params[k] for k in param}
            if "particle_sizes" in params:
                params["particle_sizes"] = params["particle_sizes"][-1]
            params["filename"] = str(nuc.name)

            for stat, kwargs in sorted(stat_names.items()):
                stat_values = globals()[stat](f, structure, **kwargs)
                params["{}_mean".format(stat)] = np.mean(stat_values)
                params["{}_std".format(stat)] = np.std(stat_values)
            writer.writerow(params)