def get_graph(): g = nx.MultiDiGraph() g.add_nodes_from(species.keys()) nx.set_node_attributes(g, species, name="matlab") # Flux and how to get its value g.add_edge('Ran·GTP (n)', 'Ran·GTP (c)', matlab="FluxRanGTP") g.add_edge('Ran·GDP (n)', 'Ran·GDP (c)', matlab="FluxRanGDP") g.add_edge('Ran·GTP (c)', 'Ran·GDP (c)', matlab="GAP") g.add_edge('RanBP1·Ran·GTP (c)', 'Ran·GDP (c)', matlab="GAP_RanBP1") g.add_edge('Ran·GTP (c)', 'RanBP1·Ran·GTP (c)', matlab="F RanGTP--RanBP1") # g.add_edge('RanBP1·Ran·GTP (c)', 'RanBP1 (c)', matlab="GAP_RanBP1") # g.add_edge('RanBP1 (c)', 'RanBP1·Ran·GTP (c)', matlab="F RanGTP--RanBP1") g.add_edge('Impβ·Ran·GTP (c)', 'Ran·GDP (c)', matlab="ImpB GAP") g.add_edge('Impβ·Ran·GTP (c)', 'Free Impβ (c)', matlab="ImpB GAP") g.add_edge('Free Impβ (n)', 'Free Impβ (c)', matlab="F ImpB") g.add_edge('Ran·GDP (n)', 'Ran·GTP (n)', matlab="Nuc RanGDP to RanGTP conversion") g.add_edge('Impβ·Ran·GTP (n)', 'Impβ·Ran·GTP (c)', matlab="F ImpB--RanGTP") g.add_edge('Impβ·Ran·GTP (n)', 'Free Impβ (n)', matlab="R nuc") g.add_edge('Impβ·Ran·GTP (n)', 'Ran·GTP (n)', matlab="R nuc") g.add_edge('Impβ·Ran·GTP (c)', 'Free Impβ (c)', matlab="R cyto") g.add_edge('Impβ·Ran·GTP (c)', 'Ran·GTP (c)', matlab="R cyto") g.add_edge('Free cargo (n)', 'Free cargo (c)', matlab="F Cargo") g.add_edge('Cargo·Impβ (n)', 'Cargo·Impβ (c)', matlab="F ImpB--Cargo") g.add_edge('Cargo·Impβ (n)', 'Free Impβ (n)', matlab="C nuc") g.add_edge('Cargo·Impβ (n)', 'Free cargo (n)', matlab="C nuc") g.add_edge('Cargo·Impβ (c)', 'Free Impβ (c)', matlab="C cyto") g.add_edge('Cargo·Impβ (c)', 'Free cargo (c)', matlab="C cyto") g.add_edge('Cargo·Impβ (n)', 'Free cargo (n)', matlab="Cargo knock-off nuc") g.add_edge('Cargo·Impβ (n)', 'Impβ·Ran·GTP (n)', matlab="Cargo knock-off nuc") g.add_edge('Ran·GTP (n)', 'Impβ·Ran·GTP (n)', matlab="Cargo knock-off nuc") g.add_edge('Cargo·Impβ (c)', 'Free cargo (c)', matlab="Cargo knock-off cyto") g.add_edge('Cargo·Impβ (c)', 'Impβ·Ran·GTP (c)', matlab="Cargo knock-off cyto") g.add_edge('Ran·GTP (c)', 'Impβ·Ran·GTP (c)', matlab="Cargo knock-off cyto") if not set(g.nodes).issubset(species): log.warning(f"Don't know species: {set(g.nodes) - set(species)}.") return g
def if_sheets_fails(retry_state: RetryCallState): assert isinstance(retry_state.outcome, Future) log.warning( f"Fetching sheet failed with exception `{retry_state.outcome.exception()}`." ) log.warning(f"LOADING FROM DISK.") df = pd.read_table(unlist1(out_dir.glob("*.tsv")), dtype=str, na_filter=None) return df
def print_scenario(df, c): log.info(f"Making scenario: {c}.") print(f'% Autogenerated by {relpath(__file__)} on {Now()}.') print(f'') for (i, c, n, p, u, v) in zip(df.Item, df.Compartment, df.Name, df.Parameter, df.Units, df[c]): if (v.lower() in ["", "default"]): continue log.info(f"Processing item: `{i}`.") if (i == ""): pass elif (i == "Reaction"): print( *[ f'r = m.Reactions({{m.Reactions.Name}} == "{n}");', f'k = r.KineticLaw;', f'p = k.Parameters({{k.Parameters.Name}} == "{p}");', f'assert(1 == length(p));', f'assert(p.Units == "{u}");', f'p.Value = {v};', f'', ], sep='\n' ) elif (i == "Compartment"): print( *[ f'c = m.Compartments({{m.Compartments.Name}} == "{n}");', f'assert(1 == length(c));', f'assert(c.Units == "{u}");', f'c.Value = {v};', f'', ], sep='\n' ) elif (i == "Species"): assert (p == "Value") assert (c != "") # Compartment print( *[ f'c = [m.Species.Parent];', f's = m.Species(({{m.Species.Name}} == "{n}") & ({{c.Name}} == "{c}"));', f'assert(1 == length(s));', f'assert(s.Units == "{u}");', f's.Value = {v};', f'', ], sep='\n' ) else: log.warning(f"Unknown item: `{i}`.")
def plot_total_timecourse(run, spp): with Plox(style) as px: fmt = { '(c)': dict(ls="--", lw=3, alpha=0.5), '(n)': dict(ls="-.", lw=3, alpha=0.9), 'NPC': dict(ls="-", lw=3, alpha=0.8), } for s in fmt: px.a.plot(1, 0, **fmt[s], color='k', label=f"...{s}") color = f"C{0}" # px.a.plot(1, 0, "-", color=color, label=label) # Aggregate by suffix spp_by_suffix = { suffix: [sp for sp in spp if sp.endswith(suffix)] for suffix in {"(c)", "(n)", "NPC"} } if len(spp) != sum(map(len, spp_by_suffix.values())): log.warning( f"Unknown suffix for species: {set(spp) - set(from_iterable(spp_by_suffix.values()))}" ) # time x species table of concentrations tx: pd.DataFrame = run.tx for (suffix, spp) in spp_by_suffix.items(): if suffix == 'NPC': f = NPC_CONCENTRATION_FACTOR else: f = 1 x = tx[spp].sum(axis=1) * f px.a.plot(tx.index / 3600, x, **fmt[suffix], color=color) px.a.set_yticks([y for y in px.a.get_yticks() if (y >= 0)]) px.a.set_yticklabels([f"{y:.2g}" for y in px.a.get_yticks()]) px.a.set_xlabel(f"Time, h") px.a.set_xscale('log') px.a.legend(loc="upper left") yield px
def main(): from data_source import runs summary = pd.DataFrame() for (i, run) in sorted(runs.iterrows()): for (sp_display, sp_pattern) in sp_specs.items(): # Species to include in the plot collect_spp = [ candidate for candidate in run.tx.columns if re.match(sp_pattern, candidate) ] if collect_spp: log.info(f"Species for spec `{sp_display}`: {collect_spp}.") else: log.warning(f"No species selected for spec `{sp_display}`.") # File name and proto-ylabel name = sp_display for px in plot_total_steadystate(run, collect_spp): img_file = mkdir(out_dir / i) / f"{name}.png" summary.loc[name, i] = img_file label = fr"{name}, $\mu$M" label = label.replace("Δ", r"$\Delta$") # pdflatex issue with UTF px.a.set_title(label, fontdict={'fontsize': 20}) log.info(f"Writing: {relpath(img_file)}") px.f.savefig(img_file) # Write an HTML overview with (out_dir / "index.html").open(mode='w') as fd: with contextlib.redirect_stdout(fd): print( summary.applymap(lambda p: os.path.relpath(p, out_dir)). applymap( lambda p: f'<a href="{p}"><img style="width:{IMG_WIDTH}px" src="{p}"/></a>' ).to_html(escape=False))
def plot_total_timecourse(run, spp): with Plox(style) as px: fmt = { '(c)': dict(ls="--", lw=2, alpha=0.5), '(n)': dict(ls="-.", lw=2, alpha=0.9), 'NPC': dict(ls="-", lw=2, alpha=0.8), } for s in fmt: px.a.plot(1, 0, **fmt[s], color='k', label=f"...{s}") color = f"C{0}" # px.a.plot(1, 0, "-", color=color, label=label) # Aggregate by suffix spp_by_suffix = { suffix: [sp for sp in spp if sp.endswith(suffix)] for suffix in {"(c)", "(n)", "NPC"} } if (sum(map(len, spp_by_suffix.values())) != len(spp)): log.warning( f"Unknown suffix for species: {set(spp) - set(from_iterable(spp_by_suffix.values()))}" ) # time x species table of concentrations tx: pd.DataFrame = run.tx for (suffix, spp) in spp_by_suffix.items(): x = tx[spp].sum(axis=1) px.a.plot(tx.index / 3600, x, **fmt[suffix], color=color) px.a.set_xlabel(f"Time, h") px.a.set_xscale('log') px.a.legend(fontsize=10) yield px
def plot_total_steadystate(run, spp): with Plox(style) as px: fmt = { '(c)': dict(ls="--", lw=2, alpha=0.5), '(n)': dict(ls="-.", lw=2, alpha=0.9), 'NPC': dict(ls="-", lw=2, alpha=0.8), } # for s in fmt: # px.a.plot(1, 0, **fmt[s], color='k', label=f"...{s}") color = f"C{0}" # px.a.plot(1, 0, "-", color=color, label=label) # Aggregate by suffix spp_by_suffix = { suffix: [sp for sp in spp if sp.endswith(suffix)] for suffix in ["(c)", "NPC", "(n)"] # order for display } if (sum(map(len, spp_by_suffix.values())) != len(spp)): log.warning(f"Unknown suffix for species: {set(spp) - set(from_iterable(spp_by_suffix.values()))}") # `time` x `species` table of concentrations tx: pd.DataFrame = run.tx agg_by_suffix = pd.DataFrame(data={ suffix: tx[spp].sum(axis=1) for (suffix, spp) in spp_by_suffix.items() }) x01: pd.DataFrame = agg_by_suffix.iloc[[0, -1]] # Make heatmap cmap = mcolors.LinearSegmentedColormap.from_list('concentration', ["white", "darkblue"]) # vmax = 10 ** np.ceil(np.log10(x01.max().max())) # vmax = x01.values.sum().sum() vmax = x01.loc[:, spp_by_suffix].sum(axis=1).max() # sanity fix vmax = (vmax if not np.isclose(vmax, 0) else 1) im = px.a.imshow(x01, cmap=cmap, vmin=0, vmax=vmax, origin="upper", aspect="auto") assert (2 == len(x01.index)), "Expect initial and final state in rows." px.a.set_yticks(np.arange(0, len(x01.index))) px.a.set_yticklabels(["Initial", "Final"]) px.a.set_xticks(np.arange(0, len(x01.columns))) px.a.set_xticklabels(x01.columns) for i in range(x01.shape[0]): for j in range(x01.shape[1]): alignment = dict(ha="center", va="center") im.axes.text(j, i, "{:.3g}".format(x01.iloc[i, j]), fontsize=17, color="red", **alignment) # (xlim, ylim) = (px.a.get_xlim(), px.a.get_ylim()) yield px
from sklearn.metrics.pairwise import cosine_similarity out_dir = mkdir(Path(__file__).with_suffix('')) try: dataset_name = "CasteloBranco-2018" cb_dir = str( next((p.resolve() for p in Path.cwd().parents for p in p.glob(f"**/{dataset_name}")), None)) (cb_dir in sys.path) or sys.path.append(cb_dir) from z_sources import df_expr as df_expr_cb, df_meta as df_meta_cb except ImportError: log.warning("Import from z_sources failed.") raise try: ab_dir = next((p.resolve() for p in Path.cwd().parents for p in p.glob("**/Mouse-WCH-2020")), None) df_expr_ab = pd.read_table(unlist1(ab_dir.glob("*fewer_cells/*data*")), index_col=0) df_meta_ab = pd.read_table(unlist1(ab_dir.glob("*fewer_cells/*meta*")), index_col=0) df_meta_ab = df_meta_ab.reindex(df_expr_ab.columns) except Exception: raise # Only keep the genes in common and sort consistently (df_expr_ab, df_expr_cb) = df_expr_ab.align(df_expr_cb.T, join="inner", axis=0)
def main(): if PARAM.DUMMY_MODE: log.info("WE'RE IN DUMMY MODE.") out_dir = mkdir(Path(__file__).with_suffix('')) if PARAM.DUMMY_MODE: data_file = out_dir / "dummy_data.csv.gz" meta_file = out_dir / "dummy_meta.csv.gz" else: data_file = out_dir / "data.csv.gz" meta_file = out_dir / "meta.csv.gz" def peek(x): log.info(x) return x def meta_open_remote(): if PARAM.DUMMY_MODE: return open(unlist1(download.local_folder.glob("dummy_meta.csv")), mode='r') else: return download(URLS['meta']).now.open() def data_open_remote(): if PARAM.DUMMY_MODE: return open(unlist1(download.local_folder.glob("dummy_data.csv")), mode='r') else: return closing(urllib.request.urlopen(url=URLS['expr'])) # Make a reduced metadata file with meta_open_remote() as fd: if meta_file.exists(): log.warning( f"File will be overwritten when done: {relpath(meta_file)}") df_meta = pd.read_csv(fd, sep=PARAM.remote_sep, index_col=0) assert (df_meta.shape == (len(df_meta), 56)) nsamples_total = len(df_meta) log.info( f"Based on the metadata, there are {nsamples_total} in total.") # Subset df_meta to samples of interest if PARAM.DUMMY_MODE: ix = df_meta.sample(12, random_state=5, replace=False).index else: ix = df_meta.index[df_meta.subclass_label.isin( PARAM.subclass_of_interest)] df_meta = df_meta[df_meta.index.isin(ix)] df_meta.to_csv(meta_file, sep=PARAM.local_sep, compression='gzip') log.info(f"Size of reduced dataset: {len(df_meta)}.") log.info(f"Finished {relpath(meta_file)}") # Make a reduced expression data file with data_open_remote() as rd: if data_file.exists(): log.warning( f"File will be overwritten when done: {relpath(data_file)}") if PARAM.DUMMY_MODE: chunksize = 24 else: chunksize = 1024 nchunks_expected = (nsamples_total // chunksize) + bool( (nsamples_total % chunksize)) log.info( f"Chunksize is {chunksize} rows. Expect {nchunks_expected} chunks." ) log.info(f"Downloading.") df_data = pd.concat( axis=0, objs=[ chunk[chunk.index.isin(df_meta.index)] for chunk in progressbar(pd.read_csv( rd, sep=PARAM.remote_sep, index_col=0, chunksize=chunksize), max_value=nchunks_expected) if any(chunk.index.isin(df_meta.index)) ]) # genes x samples df_data = df_data.T df_data.to_csv(data_file, sep=PARAM.local_sep, compression='gzip') log.info( f"Data has {len(df_data.columns)} samples, expected {len(df_meta)}." ) log.info(f"Finished {relpath(data_file)}")
def main(): if PARAM.DUMMY_MODE: log.info("WE'RE IN DUMMY MODE.") out_dir = mkdir(Path(__file__).with_suffix('')) if PARAM.DUMMY_MODE: data_file = out_dir / "dummy_data.csv.gz" meta_file = out_dir / "dummy_meta.csv.gz" else: data_file = out_dir / "data.csv.gz" meta_file = out_dir / "meta.csv.gz" def peek(x, text=None): if text is None: log.info(x) else: log.info(text) return x def meta_open_remote(): if PARAM.DUMMY_MODE: return open(unlist1(download.local_folder.glob("dummy_meta.csv")), mode='r') else: return download(URLS['meta']).now.open() def data_open_remote(): if PARAM.DUMMY_MODE: return open(unlist1(download.local_folder.glob("dummy_data.csv")), mode='r') else: return closing(urllib.request.urlopen(url=URLS['expr'])) # Metadata with meta_open_remote() as fd: if meta_file.exists(): log.warning(f"File will be overwritten when done: {relpath(meta_file)}") df_meta = pd.read_csv(fd, sep=PARAM.remote_sep, index_col=0) assert (df_meta.shape == (len(df_meta), 56)) nsamples_total = len(df_meta) log.info(f"Based on metadata, there are {nsamples_total} samples.") df_meta.to_csv(meta_file, sep=PARAM.local_sep, compression='gzip') log.info(f"Size of reduced dataset: {len(df_meta)}.") log.info(f"Finished {relpath(meta_file)}") del df_meta # Collect expression with data_open_remote() as rd: if data_file.exists(): log.warning(f"File will be overwritten when done: {relpath(data_file)}") if PARAM.DUMMY_MODE: chunksize = 24 else: chunksize = 128 nchunks_expected = (nsamples_total // chunksize) + bool((nsamples_total % chunksize)) log.info(f"Chunksize is {chunksize} rows. Expect {nchunks_expected} chunks.") log.info(f"Downloading.") df_data = pd.concat(axis=0, objs=( chunk.astype(pd.SparseDtype('int', fill_value=0)) for chunk in progressbar( pd.read_csv( rd, sep=PARAM.remote_sep, index_col=0, chunksize=chunksize ), max_value=nchunks_expected ) )) log.info(f"Sparse density: {df_data.sparse.density}") # genes x samples df_data = df_data.T df_data.to_csv(data_file, sep=PARAM.local_sep, compression='gzip') log.info(f"Data has {len(df_data.columns)} samples.") log.info(f"Finished {relpath(data_file)}")
def show(g: nx.MultiDiGraph, state: pd.Series): # pos = nx.shell_layout(g) # pos = nx.planar_layout(g) # pos = nx.spring_layout(g) pos = { 'Free cargo (c)': [-6, +8], 'Cargo·Impβ (c)': [+6, +8], 'Impβ·Ran·GTP (c)': [-4, +6], 'Free Impβ (c)': [+4, +6], 'Ran·GTP (c)': [-2, +4], 'Ran·GDP (c)': [+2, +4], 'RanBP1·Ran·GTP (c)': [0, 3], # 'RanBP1 (c)': [0, 1], 'Ran·GTP (n)': [-2, -2], 'Ran·GDP (n)': [+2, -2], 'Impβ·Ran·GTP (n)': [-4, -4], 'Free Impβ (n)': [+4, -4], 'Free cargo (n)': [-6, -6], 'Cargo·Impβ (n)': [+6, -6], } # pos = nx.get_node_attributes(g, name='pos') species = pd.Series(nx.get_node_attributes(g, name='matlab')) # species = pd.DataFrame({'matlab': species, 'value': species.map(state)}) species = species.map(state) if any(species.isna()): log.warning( f"Species have no data: \n{list(species[species.isna()].index)}") # Directed multifluxes fluxes = pd.Series(nx.get_edge_attributes(g, name='matlab')) fluxes = pd.DataFrame({'matlab': fluxes, 'value': fluxes.map(state)}) # Directed fluxes f = fluxes.reset_index().groupby( by=['level_0', 'level_1']).value.sum().to_dict() # One directed edge per pair - version of the graph ug = (lambda ug: nx.DiGraph(ug).edge_subgraph(ug.edges))(nx.Graph(g)) # Combined fluxes fluxes = pd.Series(index=list(ug.edges), data=list( ug.edges)).transform(lambda e: (f.get(e, 0) - f.get((e[1], e[0]), 0))) if any(fluxes.isna()): log.warning( f"Fluxes have no data: \n{list(fluxes[fluxes.isna()].index)}") # log.info(f"Species: \n{species}") # log.info(f"Fluxes: \n{fluxes}") node_size = species.fillna(0) # node_size = node_size[node_size > 0] # node_size = node_size / node_size.sum() node_size = 300 * node_size node_labels = pd.Series(data=species.index, index=species.index) node_labels = node_labels.transform( lambda s: s.replace("(c)", "").replace("(n)", "").strip()) edge_width = fluxes edge_width = edge_width[edge_width != 0] (fwd, bwd) = (edge_width[edge_width >= 0].index, edge_width[edge_width < 0].index) edge_width = edge_width.transform(lambda x: np.log10(np.abs(x))) edge_width = 0.4 + (edge_width - edge_width.min()) / ( (edge_width.max() - edge_width.min()) or 1) edge_alpha = 0.7 * (edge_width / edge_width.max()) edge_labels = fluxes.abs().transform(lambda x: f"{x:0.02g}") style = { rc.Figure.frameon: False, } with Plox(style) as px: kw = dict(G=g, pos=pos, ax=px.a, alpha=0.4) nx.draw_networkx_nodes(**kw, nodelist=node_size.index, node_size=node_size, node_color="C0", linewidths=0) kw = dict(G=ug, pos=pos, ax=px.a, edge_color='g') nx.draw_networkx_edges(**kw, width=edge_width[fwd], alpha=edge_alpha[fwd], edgelist=fwd) nx.draw_networkx_edges(**kw, width=edge_width[bwd], alpha=edge_alpha[bwd], edgelist=[(v, ug) for (ug, v) in bwd]) kw = dict(G=ug, pos=pos, ax=px.a, alpha=0.7) nx.draw_networkx_edge_labels(**kw, edge_labels=edge_labels.to_dict(), font_size=5, font_color='g') kw = dict(G=g, pos=pos, ax=px.a, alpha=0.8, font_color='k') nx.draw_networkx_labels(**kw, font_size=7, labels=node_labels, verticalalignment="bottom") nx.draw_networkx_labels(**kw, font_size=6, labels=species.transform( lambda x: f"{x:0.02g}"), verticalalignment="top") px.a.axis('off') y = np.mean(px.a.get_ylim()) px.a.plot(px.a.get_xlim(), [y, y], '--', color='k', lw=1, zorder=-100) kw = dict(x=(min(px.a.get_xlim()) + 0.8), ha="center", zorder=100, alpha=0.5, fontdict=dict(fontsize=6)) px.a.text(**kw, y=(y + 0.1), s="Cytoplasm", va="bottom") px.a.text(**kw, y=(y - 0.1), s="Nucleus", va="top") out_dir = mkdir(Path(__file__).with_suffix('')) px.f.savefig(out_dir / "onion.png") px.f.savefig(out_dir / "onion.pdf")