Exemplo n.º 1
0
def time_range(sample, start, end):
    target_vals = [
        1 if metadata.loc[_, "HOST_SUBJECT_ID"] == "2202:Donor%s" % sample
        and metadata.loc[_, "COLLECTION_DAY"] in list(range(start, end + 1))
        else 0 for _ in X.index
    ]
    color = Color(target=target_vals, dtype="numerical", target_by="sample")
    graph.show(color=color, fig_size=(10, 10), node_size=15, notshow=True)
    title("Subject %s at %s to %s" % (sample, start, end))
Exemplo n.º 2
0
def main(args):
    graph = Graph().read(args.graph)
    color = None
    if args.metadata:
        metadata = data_parser(args.metadata)
        col = args.column
        if not col:
            logger("No column assign, it won't assign any color.", verbose=1)

        else:
            col_data = metadata.loc[:, col]
            color = Color(col_data, dtype=args.dtype, target_by='sample')

    vis_progressX(graph,
                  mode='file',
                  simple=False if args.complex else True,
                  color=color,
                  filename=args.output,
                  auto_open=False)
Exemplo n.º 3
0
              overlap=0.75)
graph = tm.map(data=X, cover=cover, clusterer=clusterer)
print(graph.info())

## Step 6. SAFE test for every features.

# target_feature = 'Faecalibacterium'
# target_feature = 'Prevotella'
target_feature = 'Bacteroides'
n_iter = 1000
enriched_scores = SAFE_batch(graph, metadata=X, n_iter=n_iter, _mode='enrich')
target_safe_score = enriched_scores.loc[:, target_feature]

## Step 7. Visualization

# colors by samples (target values in a list)
color = Color(target=X.loc[:, target_feature],
              dtype="numerical",
              target_by="sample")
graph.show(color=color, fig_size=(10, 10), node_size=15, notshow=True)

# colors by nodes (target values in a dictionary)
color = Color(target=target_safe_score, dtype="numerical", target_by="node")
graph.show(color=color, fig_size=(10, 10), node_size=15, notshow=True)

safe_summary = get_SAFE_summary(graph=graph,
                                metadata=X,
                                safe_scores=enriched_scores,
                                n_iter=n_iter,
                                p_value=0.01)
Exemplo n.º 4
0
 def show_samples(self, samples, **kwargs):
     nids = self.sample2nodes(samples)
     target = [1 if nid in nids else 0 for nid in self.nodes]
     color = Color(target, target_by='node', dtype='categorical')
     show(self, mode=None, color=color, **kwargs)
Exemplo n.º 5
0
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn import datasets
from sklearn.cluster import DBSCAN
from tmap.tda import mapper, Filter
from tmap.tda.cover import Cover
from tmap.tda.plot import Color

iris = datasets.load_iris()
X = iris.data
y = iris.target

# Step1. initiate a Mapper
tm = mapper.Mapper(verbose=1)

# Step2. Projection
lens = [Filter.MDS(components=[0, 1], random_state=100)]
projected_X = tm.filter(X, lens=lens)

# Step3. Covering, clustering & mapping
clusterer = DBSCAN(eps=0.75, min_samples=1)
cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X),
              resolution=20,
              overlap=0.75)
graph = tm.map(data=StandardScaler().fit_transform(X),
               cover=cover,
               clusterer=clusterer)
print(graph.info())
# Step4. Visualization
color = Color(target=y, dtype="categorical")
graph.show(color=color, fig_size=(10, 10), node_size=15, notshow=True)
Exemplo n.º 6
0
def draw_stratification(graph,
                        SAFE_dict,
                        cols,
                        output,
                        mode='html',
                        n_iter=1000,
                        p_val=0.05,
                        width=1000,
                        height=1000,
                        allnodes=False):
    # Enterotyping-like stratification map based on SAFE score

    node_pos = graph.nodePos
    sizes = graph.size
    nodes = graph.nodes
    sizes = np.array([sizes[_] for _ in range(len(nodes))]).reshape(-1, 1)

    transformed_sizes = MinMaxScaler(
        feature_range=(10, 40)).fit_transform(sizes).ravel()
    xs = []
    ys = []
    for edge in graph.edges:
        xs += [node_pos[edge[0], 0], node_pos[edge[1], 0], None]
        ys += [node_pos[edge[0], 1], node_pos[edge[1], 1], None]
    fig = plotly.tools.make_subplots(1, 1)

    node_line = go.Scatter(
        # ordination line
        visible=True,
        x=xs,
        y=ys,
        marker=dict(color="#8E9DA2", opacity=0.7),
        line=dict(width=1),
        showlegend=False,
        hoverinfo='skip',
        mode="lines")
    fig.append_trace(node_line, 1, 1)

    safe_score_df = pd.DataFrame.from_dict(
        SAFE_dict)  # row: nodes, columns: features
    min_p_value = 1.0 / (n_iter + 1.0)
    SAFE_pvalue = np.log10(p_val) / np.log10(min_p_value)
    tmp = [
        safe_score_df.columns[_]
        if safe_score_df.iloc[idx, _] >= SAFE_pvalue else np.nan
        for idx, _ in enumerate(np.argmax(safe_score_df.values, axis=1))
    ]
    # get enriched features with biggest SAFE_score per nodes.
    t = Counter(tmp)
    # number of (imp) features among all nodes. (imp: with biggest SAFE score per node compared other features at same node and bigger than p_val)
    if cols:
        if any([_ not in safe_score_df.columns for _ in cols]):
            logger(
                "There are provided cols \" %s\"doesn't at SAFE summary table."
                % ';'.join(cols),
                verbose=1)
        for fea in cols:
            if allnodes:
                color = Color(SAFE_dict[fea],
                              target_by='node',
                              dtype='numerical')
                subfig = vis_progressX(graph,
                                       simple=True,
                                       mode='obj',
                                       color=color)
                subfig.data[1]['name'] = fea
                fig.append_trace(subfig.data[1], 1, 1)
            else:
                get_nodes_bool = (safe_score_df.loc[:, fea] >=
                                  SAFE_pvalue).all()
                if not get_nodes_bool:
                    # if all False....
                    logger(
                        "fea: %s get all False bool indicated there are not enriched nodes showed at the graph"
                        % fea,
                        verbose=1)
                else:
                    node_position = go.Scatter(
                        # node position
                        visible=True,
                        x=node_pos[get_nodes_bool, 0],
                        y=node_pos[get_nodes_bool, 1],
                        hoverinfo="text",
                        marker=dict(  # color=node_colors,
                            size=[
                                sizes[_, 0] for _ in np.arange(
                                    node_pos.shape[0])[get_nodes_bool]
                            ],
                            opacity=0.9),
                        showlegend=True,
                        name=str(fea) + ' (%s)' % str(t.get(fea, 0)),
                        mode="markers")
                    fig.append_trace(node_position, 1, 1)
    else:
        for idx, fea in enumerate(
            [_ for _, v in sorted(t.items(), key=lambda x: x[1]) if v >= 10]):
            # safe higher than threshold, just centroides
            node_position = go.Scatter(
                # node position
                visible=True,
                x=node_pos[np.array(tmp) == fea, 0],
                y=node_pos[np.array(tmp) == fea, 1],
                hoverinfo="text",
                marker=dict(  # color=node_colors,
                    size=[
                        transformed_sizes[_] for _ in np.arange(
                            node_pos.shape[0])[np.array(tmp) == fea]
                    ],
                    opacity=0.9),
                showlegend=True,
                name=str(fea) + ' (%s)' % str(t[fea]),
                mode="markers")
            fig.append_trace(node_position, 1, 1)
    fig.layout.width = width
    fig.layout.height = height
    fig.layout.font.size = 15
    fig.layout.hovermode = 'closest'

    output_fig(fig, output, mode)
    logger("Stratification graph has been output to", output, verbose=1)
Exemplo n.º 7
0
# Step3. Covering, clustering & mapping
clusterer = DBSCAN(eps=0.1, min_samples=5)
cover = Cover(projected_data=projected_X,
              resolution=20,
              overlap=0.1)
graph = tm.map(data=X,
               cover=cover,
               clusterer=clusterer)
############################################################
metadata = pd.get_dummies(metadata)
metadata.loc[:, 'circle'] = y
node_metadata = graph.transform_sn(metadata, type='s2n')
############################################################
color1 = Color(target=node_metadata.iloc[:, 0],
               dtype='numerical',
               target_by='sample')
# color1.get_colors(graph.nodes)


color2 = Color(target=node_metadata.iloc[:, 0],
               dtype='numerical',
               target_by='node')
color3 = Color(target=metadata.iloc[:, 0],
               dtype='numerical',
               target_by='sample')
assert np.all(color3.get_colors(graph.nodes)[1][1] == color2.get_colors(graph.nodes)[1][1])

color4 = Color(target=node_metadata.iloc[:, 1],
               dtype='categorical',
               target_by='node')  # wrong example, it should not use it as this way
Exemplo n.º 8
0
metric = Metric(metric="precomputed")
lens = [Filter.MDS(components=[0, 1], metric=metric, random_state=100)]
projected_X = tm.filter(dm, lens=lens)

# Step4. Covering, clustering & mapping
eps = optimize_dbscan_eps(X, threshold=99)
clusterer = DBSCAN(eps=eps, min_samples=3)
cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X),
              resolution=50,
              overlap=0.85)
graph = tm.map(data=X, cover=cover, clusterer=clusterer)
print(graph.info())

target_feature = 'COLLECTION_DAY'
color = Color(target=metadata.loc[:, target_feature],
              dtype="numerical",
              target_by="sample")
graph.show(color=color, fig_size=(10, 10), node_size=15, notshow=True)

target_feature = 'HOST_SUBJECT_ID'
color = Color(target=metadata.loc[:, target_feature],
              dtype="categorical",
              target_by="sample")
graph.show(color=color, fig_size=(10, 10), node_size=15, notshow=True)

color = Color(target=metadata.loc[:, target_feature],
              dtype="numerical",
              target_by="sample")
graph.show(color=color, fig_size=(10, 10), node_size=15, notshow=True)

Exemplo n.º 9
0
lens = [Filter.MDS(components=[0, 1], metric=metric, random_state=100)]
projected_X = tm.filter(dm, lens=lens)

# Step4. Covering, clustering & mapping
eps = optimize_dbscan_eps(X, threshold=99)
clusterer = DBSCAN(eps=eps, min_samples=3)
cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X),
              resolution=35,
              overlap=0.9)
graph = tm.map(data=X, cover=cover, clusterer=clusterer)
print(graph.info())

target_feature = 'COLLECTION_DAY'

color = Color(target=metadata.loc[:, target_feature],
              dtype="numerical",
              target_by="sample")
graph.show(color=color,
           fig_size=(10, 10),
           node_size=15,
           strength=0.03,
           notshow=True)

target_feature = 'Bacteroides'
color = Color(target=X.loc[:, target_feature],
              dtype="numerical",
              target_by="sample")
graph.show(color=color,
           fig_size=(10, 10),
           node_size=15,
           strength=0.03,