Exemplo n.º 1
0
def tsne(df, out_csv: Path):
    log.info(relpath(out_csv))

    if out_csv.is_file():
        X = pd.read_csv(out_csv, sep='\t', compression="infer", index_col=0)
        assert (X.index.name == "sample_name")
        assert (list(X.columns) == ['x', 'y'])
    else:
        assert (df.index.name == "gene_name")
        X = pd.DataFrame(
            index=pd.Series(df.columns, name="sample_name"),
            columns=['x', 'y'],
            data=TSNE(random_state=seed).fit_transform(df.T),
        )
        X.to_csv(out_csv, sep='\t', compression="gzip")

    # https://matplotlib.org/tutorials/introductory/customizing.html
    style = {
        'legend.fontsize': "xx-small",
        'legend.framealpha': 0.5,
    }

    with Plox(style) as px:
        px.a.plot(X.x, X.y, '.', ms=(10 / np.log10(len(X))))
        # px.a.legend()
        px.a.axis('off')
        px.f.savefig(out_csv.with_suffix(".png"))
Exemplo n.º 2
0
def make_scenarios(df: pd.DataFrame):
    scenarios_start = 1 + min(i for (i, c) in enumerate(df.columns) if c.lower().startswith("scenario"))
    log.info(f"Scenarios start at column #{scenarios_start}.")

    for c in df.columns[scenarios_start:]:
        if c:
            with (out_dir / f"{c}.m").open(mode='w') as fd:
                with contextlib.redirect_stdout(fd):
                    print_scenario(df, c)
Exemplo n.º 3
0
def main():
    from data_source import runs

    sp_specs = [
        {'+': "CAS", '-': "ΔCAS"},
        {'+': "CAS·Ran·GTP", '-': "ΔCAS·Ran·GTP"},
        {'+': "ImpA·CAS·Ran·GTP"},
        {'+': "ΔCAS·Ran·GTP"},
        {'+': "ImpA·ΔCAS·Ran·GTP"},
        {'+': "ΔCAS"},
        {'+': "ImpB"},
        {'+': "ImpA"},
        {'+': "ImpA·ImpB"},
        {'+': "Ran·GTP"},
        {'+': "NLS"},
    ]

    # `Not a species` placeholder
    nas = ("?" * 100)

    summary = pd.DataFrame()

    for (i, run) in sorted(runs.iterrows()):
        for sp_spec in sp_specs:
            # Species to include in the plot
            spp = [c for c in run.tx.columns if (sp_spec['+'] in c) and not (sp_spec.get('-', nas) in c)]

            # File name and proto-ylabel
            name = sp_spec['+'] + (f" (excl. {sp_spec['-']})" if ('-' in sp_spec) else "")

            for px in plot_total_steadystate(run, spp):
                img_file = mkdir(out_dir / i) / f"{name}.png"
                summary.loc[name, i] = img_file

                label = fr"{name}, $\mu$M"
                label = label.replace("Δ", r"$\Delta$")  # pdflatex issue with UTF
                px.a.set_title(label)

                log.info(f"Writing: {relpath(img_file)}")
                px.f.savefig(img_file)

    # Write an HTML overview

    with (out_dir / "index.html").open(mode='w') as fd:
        with contextlib.redirect_stdout(fd):
            print(
                summary.applymap(
                    lambda p: os.path.relpath(p, out_dir)
                ).applymap(
                    lambda p: f'<a href="{p}"><img style="width:196px" src="{p}"/></a>'
                ).to_html(
                    escape=False
                )
            )
Exemplo n.º 4
0
def main():
    from data_source import runs

    runs = runs['results_fig4a']

    for (RanGAP, runs) in runs.groupby('RanGAP'):
        if RanGAP:
            for px in process(runs):
                filepath = out_dir / f"RanGAP={RanGAP}.png"
                log.info(f"Writing {relpath(filepath)} .")
                px.f.savefig(filepath)
Exemplo n.º 5
0
def print_scenario(df, c):
    log.info(f"Making scenario: {c}.")

    print(f'% Autogenerated by {relpath(__file__)} on {Now()}.')
    print(f'')

    for (i, c, n, p, u, v) in zip(df.Item, df.Compartment, df.Name, df.Parameter, df.Units, df[c]):
        if (v.lower() in ["", "default"]):
            continue

        log.info(f"Processing item: `{i}`.")

        if (i == ""):
            pass
        elif (i == "Reaction"):
            print(
                *[
                    f'r = m.Reactions({{m.Reactions.Name}} == "{n}");',
                    f'k = r.KineticLaw;',
                    f'p = k.Parameters({{k.Parameters.Name}} == "{p}");',
                    f'assert(1 == length(p));',
                    f'assert(p.Units == "{u}");',
                    f'p.Value = {v};',
                    f'',
                ],
                sep='\n'
            )
        elif (i == "Compartment"):
            print(
                *[
                    f'c = m.Compartments({{m.Compartments.Name}} == "{n}");',
                    f'assert(1 == length(c));',
                    f'assert(c.Units == "{u}");',
                    f'c.Value = {v};',
                    f'',
                ],
                sep='\n'
            )
        elif (i == "Species"):
            assert (p == "Value")
            assert (c != "")  # Compartment
            print(
                *[
                    f'c = [m.Species.Parent];',
                    f's = m.Species(({{m.Species.Name}} == "{n}") & ({{c.Name}} == "{c}"));',
                    f'assert(1 == length(s));',
                    f'assert(s.Units == "{u}");',
                    f's.Value = {v};',
                    f'',
                ],
                sep='\n'
            )
        else:
            log.warning(f"Unknown item: `{i}`.")
Exemplo n.º 6
0
def main():
    from data_source import runs

    summary = pd.DataFrame()

    for (i, run) in sorted(runs.iterrows()):
        for (sp_display, sp_pattern) in sp_specs.items():
            # Species to include in the plot
            collect_spp = [
                candidate for candidate in run.tx.columns
                if re.match(sp_pattern, candidate)
            ]

            if collect_spp:
                log.info(f"Species for spec `{sp_display}`: {collect_spp}.")
            else:
                log.warning(f"No species selected for spec `{sp_display}`.")

            # File name and proto-ylabel
            name = sp_display

            for px in plot_total_steadystate(run, collect_spp):
                img_file = mkdir(out_dir / i) / f"{name}.png"
                summary.loc[name, i] = img_file

                label = fr"{name}, $\mu$M"
                label = label.replace("Δ",
                                      r"$\Delta$")  # pdflatex issue with UTF
                px.a.set_title(label, fontdict={'fontsize': 20})

                log.info(f"Writing: {relpath(img_file)}")
                px.f.savefig(img_file)

    # Write an HTML overview

    with (out_dir / "index.html").open(mode='w') as fd:
        with contextlib.redirect_stdout(fd):
            print(
                summary.applymap(lambda p: os.path.relpath(p, out_dir)).
                applymap(
                    lambda p:
                    f'<a href="{p}"><img style="width:{IMG_WIDTH}px" src="{p}"/></a>'
                ).to_html(escape=False))
Exemplo n.º 7
0
def download_expr():
    log.info("Downloading the expr data.")
    log.info(download(URLS['expr']).now.meta)
Exemplo n.º 8
0
def download_meta():
    log.info("Downloading the meta data.")
    with download(URLS['meta']).now.open() as rd:
        df_meta = pd.read_csv(rd, sep=',', index_col=0)

    # print(json.dumps(Counter(df_meta.subclass_label), indent=2))
    summary = {
        "NaN": 4014,
        "L2/3 IT ENTl": 5764,
        "L2 IT RHP": 7599,
        "L2/3 IT PPP": 34084,
        "L2 IT ENTl": 4068,
        "L4/5 IT CTX": 253722,
        "L5 PT CTX": 16783,
        "L5 IT TPE-ENT": 5525,
        "L2/3 IT CTX-1": 117565,
        "L3 IT ENT": 13789,
        "L3 RSP-ACA": 4214,
        "L2/3 IT CTX-2": 7141,
        "L6 IT CTX": 79403,
        "L5 PPP": 1240,
        "L6 IT ENTl": 1169,
        "L5 IT CTX": 44889,
        "L5 NP CTX": 29378,
        "L6 CT CTX": 135241,
        "L6b CTX": 13114,
        "L6b/CT ENT": 20789,
        "NP SUB": 1949,
        "NP PPP": 2695,
        "V3d": 66,
        "Meis2": 1,
        "Lamp5": 38464,
        "Vip": 41626,
        "Sncg": 11573,
        "Sst": 42310,
        "Pvalb": 31088,
        "Sst Chodl": 1906,
        "DG": 58754,
        "CA1-ProS": 16141,
        "Car3": 21538,
        "SUB-ProS": 4406,
        "CT SUB": 6012,
        "CA2": 336,
        "CA3": 1899,
        "CR": 268,
        "Oligo": 7685,
        "Astro": 3119,
        "SMC-Peri": 198,
        "Endo": 746,
        "VLMC": 129,
        "Micro-PVM": 636
    }

    log.info("Making the dummy set.")
    with closing(urllib.request.urlopen(url=URLS['expr'])) as rd:
        df_data = pd.read_csv(rd, sep=',', index_col=0, nrows=50).iloc[:,
                                                                       0:101]
    df_meta = df_meta.loc[df_data.index]
    df_meta.to_csv(download.local_folder / "dummy_meta.csv", sep=',')
    df_data.to_csv(download.local_folder / "dummy_data.csv", sep=',')
    log.info("Dummy set done.")

    log.info("Run b_reduced.py to download the reduced dataset.")
Exemplo n.º 9
0
            data=data.x,
            dtype=float,
        ).loc[data.t.squeeze() >= data.t_react.squeeze()]
    )

    load_params = First(str).then(loadmat).then(pd.Series).then(
        lambda data: data[["ImpB", "RanBP1", "RanGAP", "t_react"]].transform(lambda x: np.array(x).squeeze()),
    )

    data = pd.DataFrame({
        file.stem: pd.Series({
            'tx': load_tx(file),
            **load_params(file).to_dict()
        })
        for file in folder.glob("*.mat")
    })

    return data.T


runs = {
    folder.name: load_runs(folder)
    for folder in Path(__file__).parent.parent.glob("results*")
    if folder.is_dir()
}

log.info(f"Loaded runs from folders: {', '.join(runs)} .")

if __name__ == '__main__':
    print(runs)
Exemplo n.º 10
0
 def peek(x):
     log.info(x)
     return x
Exemplo n.º 11
0
def main():
    if PARAM.DUMMY_MODE:
        log.info("WE'RE IN DUMMY MODE.")

    out_dir = mkdir(Path(__file__).with_suffix(''))

    if PARAM.DUMMY_MODE:
        data_file = out_dir / "dummy_data.csv.gz"
        meta_file = out_dir / "dummy_meta.csv.gz"
    else:
        data_file = out_dir / "data.csv.gz"
        meta_file = out_dir / "meta.csv.gz"

    def peek(x):
        log.info(x)
        return x

    def meta_open_remote():
        if PARAM.DUMMY_MODE:
            return open(unlist1(download.local_folder.glob("dummy_meta.csv")),
                        mode='r')
        else:
            return download(URLS['meta']).now.open()

    def data_open_remote():
        if PARAM.DUMMY_MODE:
            return open(unlist1(download.local_folder.glob("dummy_data.csv")),
                        mode='r')
        else:
            return closing(urllib.request.urlopen(url=URLS['expr']))

    # Make a reduced metadata file
    with meta_open_remote() as fd:
        if meta_file.exists():
            log.warning(
                f"File will be overwritten when done: {relpath(meta_file)}")

        df_meta = pd.read_csv(fd, sep=PARAM.remote_sep, index_col=0)
        assert (df_meta.shape == (len(df_meta), 56))

        nsamples_total = len(df_meta)
        log.info(
            f"Based on the metadata, there are {nsamples_total} in total.")

        # Subset df_meta to samples of interest

        if PARAM.DUMMY_MODE:
            ix = df_meta.sample(12, random_state=5, replace=False).index
        else:
            ix = df_meta.index[df_meta.subclass_label.isin(
                PARAM.subclass_of_interest)]

        df_meta = df_meta[df_meta.index.isin(ix)]
        df_meta.to_csv(meta_file, sep=PARAM.local_sep, compression='gzip')

        log.info(f"Size of reduced dataset: {len(df_meta)}.")
        log.info(f"Finished {relpath(meta_file)}")

    # Make a reduced expression data file
    with data_open_remote() as rd:
        if data_file.exists():
            log.warning(
                f"File will be overwritten when done: {relpath(data_file)}")

        if PARAM.DUMMY_MODE:
            chunksize = 24
        else:
            chunksize = 1024

        nchunks_expected = (nsamples_total // chunksize) + bool(
            (nsamples_total % chunksize))

        log.info(
            f"Chunksize is {chunksize} rows. Expect {nchunks_expected} chunks."
        )
        log.info(f"Downloading.")

        df_data = pd.concat(
            axis=0,
            objs=[
                chunk[chunk.index.isin(df_meta.index)]
                for chunk in progressbar(pd.read_csv(
                    rd, sep=PARAM.remote_sep, index_col=0,
                    chunksize=chunksize),
                                         max_value=nchunks_expected)
                if any(chunk.index.isin(df_meta.index))
            ])

        # genes x samples
        df_data = df_data.T

        df_data.to_csv(data_file, sep=PARAM.local_sep, compression='gzip')

        log.info(
            f"Data has {len(df_data.columns)} samples, expected {len(df_meta)}."
        )
        log.info(f"Finished {relpath(data_file)}")
Exemplo n.º 12
0
def load_runs(folder) -> pd.DataFrame:
    load_tx = First(str).then(loadmat).then(pd.Series).then(
        lambda data: pd.DataFrame(
            index=pd.Series(data.t.squeeze(), name='t', dtype=float),
            columns=pd.Series(data.names.squeeze(), name='species').transform(unlist1),
            data=data.x,
            dtype=float,
        )
    )

    load_params = First(str).then(loadmat).then(pd.Series)

    data = pd.DataFrame({
        file.stem: pd.Series({
            'tx': load_tx(file),
            **load_params(file).to_dict()
        })
        for file in folder.glob("*.mat")
    })

    return data.T


runs = load_runs(base)

log.info(f"Loaded runs: {', '.join(runs.index)}")

if __name__ == '__main__':
    pass
Exemplo n.º 13
0
 def peek(x, text=None):
     if text is None:
         log.info(x)
     else:
         log.info(text)
     return x
Exemplo n.º 14
0
def main():
    if PARAM.DUMMY_MODE:
        log.info("WE'RE IN DUMMY MODE.")

    out_dir = mkdir(Path(__file__).with_suffix(''))

    if PARAM.DUMMY_MODE:
        data_file = out_dir / "dummy_data.csv.gz"
        meta_file = out_dir / "dummy_meta.csv.gz"
    else:
        data_file = out_dir / "data.csv.gz"
        meta_file = out_dir / "meta.csv.gz"

    def peek(x, text=None):
        if text is None:
            log.info(x)
        else:
            log.info(text)
        return x

    def meta_open_remote():
        if PARAM.DUMMY_MODE:
            return open(unlist1(download.local_folder.glob("dummy_meta.csv")), mode='r')
        else:
            return download(URLS['meta']).now.open()

    def data_open_remote():
        if PARAM.DUMMY_MODE:
            return open(unlist1(download.local_folder.glob("dummy_data.csv")), mode='r')
        else:
            return closing(urllib.request.urlopen(url=URLS['expr']))

    # Metadata
    with meta_open_remote() as fd:
        if meta_file.exists():
            log.warning(f"File will be overwritten when done: {relpath(meta_file)}")

        df_meta = pd.read_csv(fd, sep=PARAM.remote_sep, index_col=0)
        assert (df_meta.shape == (len(df_meta), 56))

        nsamples_total = len(df_meta)
        log.info(f"Based on metadata, there are {nsamples_total} samples.")

        df_meta.to_csv(meta_file, sep=PARAM.local_sep, compression='gzip')

        log.info(f"Size of reduced dataset: {len(df_meta)}.")
        log.info(f"Finished {relpath(meta_file)}")

        del df_meta

    # Collect expression
    with data_open_remote() as rd:
        if data_file.exists():
            log.warning(f"File will be overwritten when done: {relpath(data_file)}")

        if PARAM.DUMMY_MODE:
            chunksize = 24
        else:
            chunksize = 128

        nchunks_expected = (nsamples_total // chunksize) + bool((nsamples_total % chunksize))

        log.info(f"Chunksize is {chunksize} rows. Expect {nchunks_expected} chunks.")
        log.info(f"Downloading.")

        df_data = pd.concat(axis=0, objs=(
            chunk.astype(pd.SparseDtype('int', fill_value=0))
            for chunk in progressbar(
            pd.read_csv(
                rd, sep=PARAM.remote_sep, index_col=0, chunksize=chunksize
            ),
            max_value=nchunks_expected
        )
        ))

        log.info(f"Sparse density: {df_data.sparse.density}")

        # genes x samples
        df_data = df_data.T

        df_data.to_csv(data_file, sep=PARAM.local_sep, compression='gzip')

        log.info(f"Data has {len(df_data.columns)} samples.")
        log.info(f"Finished {relpath(data_file)}")
Exemplo n.º 15
0
assert file.is_file()

from scipy.io import loadmat

data = pd.Series(loadmat(str(file)))
# print(data)

X = pd.DataFrame(
    index=pd.Series(data.t.squeeze(), name='t', dtype=float),
    columns=pd.Series(data.names.squeeze(), name='species').transform(unlist1),
    data=data.x,
    dtype=float,
)

X = X[(1e-1 <= X.index) & (X.index <= 1e3)]
log.info(f"Effective k_d: {X['k_d_eff'].median()}")

X = X[["IBB", "IBB'", "ImpB", "ImpB'", "IBB·ImpB", "IBB*·ImpB", "IBB·ImpB'"]]

kw = {
    "IBB": dict(color='C3', ls='-', lw=3),
    "IBB'": dict(color='C3', ls='--', lw=3),
    "ImpB": dict(color='C0', ls='-', lw=3),
    "ImpB'": dict(color='C0', ls='--', lw=3),
    "IBB·ImpB": dict(color='C1', ls='-', lw=2),
    "IBB*·ImpB": dict(color='C4', ls='-', lw=3),
    "IBB·ImpB'": dict(color='C4', ls='--', lw=3),
}

style = {
    rcParam.Font.size: 12,
Exemplo n.º 16
0
def main():
    from a_download import df_meta

    out_dir = mkdir(Path(__file__).with_suffix(''))
    data_file = out_dir / "data.csv.gz"
    meta_file = out_dir / "meta.csv.gz"

    if True:
        df_meta = df_meta[df_meta.subclass_label.isin(
            PARAM.subclass_of_interest)]
        log.info(
            f"New subset of cells: {dict(df_meta.subclass_label.value_counts())}"
        )

        df_meta.to_csv(meta_file, sep=PARAM.local_sep, compression='gzip')

        log.info(f"Size of reduced dataset: {len(df_meta)}.")
        log.info(f"Finished {relpath(meta_file)}")

    with download(URLS['expr']).now.open(mode='r') as fd:
        log.info(f"Reducing the expression data.")

        df_data = pd.concat(
            axis=0,
            objs=(df[df.index.isin(df_meta.index)] for df in pd.read_csv(
                fd, sep=PARAM.remote_sep, index_col=0, chunksize=1024)
                  if any(df.index.isin(df_meta.index))))

        # genes x samples
        df_data = df_data.T

        df_data.to_csv(data_file, sep=PARAM.local_sep, compression='gzip')

        log.info(
            f"Data has {len(df_data.columns)} samples, expected {len(df_meta)}."
        )
        log.info(f"Finished {relpath(data_file)}")