Пример #1
0
def atlas_averages_annotationOnly(new_metadata, new_counttable, **kwargs):

    n_pcs = kwargs['n_pcs']
    atlas = kwargs['atlas']
    cell_type_names = kwargs['CT_lut']

    #instantiate class
    sa = northstar.Averages(
        atlas=atlas,
        n_cells_per_type=kwargs['weights_atlas_cells'],
        n_features_per_cell_type=kwargs['n_features_per_cell_type'],
        n_features_overdispersed=kwargs['n_features_overdispersed'],
        n_pcs=n_pcs,
        n_neighbors=kwargs['n_neighbors'],
        n_neighbors_out_of_atlas=kwargs['n_neighbors_out_of_atlas'],
        distance_metric='correlation',
        threshold_neighborhood=kwargs['threshold_neighborhood'],
        clustering_metric='cpm',
        resolution_parameter=kwargs['resolution_parameter'],
        normalize_counts=True,
    )
    sa.fit(new_counttable)

    n_fixed = len(sa.cell_types)
    idx = list(sa.cell_types) + list(new_counttable.columns)
    annotdf = pd.DataFrame(index=idx, columns=['new_membership', 'class'])
    idx = new_counttable.columns
    new_metadata.loc[idx, 'new_class'] = sa.membership
    new_metadata['new_class_renamed'] = [
        cell_type_names[f] if f in cell_type_names.keys() else 'NewClass_' +
        "{0:0=2d}".format(int(f) - n_fixed + 1) if (f.isdigit() == True) else f
        for f in new_metadata['new_class']
    ]
    annotdf.loc[idx, 'new_membership'] = new_metadata.loc[idx,
                                                          'new_class_renamed']
    annotdf.loc[annotdf[:n_fixed].index,
                'new_membership'] = annotdf.index[:n_fixed].map(
                    cell_type_names)

    atlastypes = list(
        np.sort(annotdf.loc[annotdf[:n_fixed].index, 'new_membership']))
    newtypes = list(
        set(new_metadata['new_class_renamed']).difference(atlastypes))
    celltypes = atlastypes + list(np.sort(newtypes))
    class_lut = dict(zip(celltypes, list(range(1, len(celltypes) + 1))))
    annotdf['class'] = annotdf['new_membership'].map(class_lut)

    return annotdf
Пример #2
0
            'class': 'subsample',
            'n_atlas': ns.n_atlas,
        }
        ress.append(resd)

    print('Run northstar (averages)')
    for aname, atlas in atlases.items():
        if not aname.endswith('avg'):
            continue
        print('Atlas: {:}'.format(aname))
        t0 = time.time()
        ns = northstar.Averages(
            atlas,
            n_features_per_cell_type=50,
            n_features_overdispersed=500,
            resolution_parameter=0.005,
            n_neighbors=30,
            #n_neighbors_external=5,
            #external_neighbors_mutual=True,
        )
        ns.fit(adata_tgt)
        t1 = time.time()
        t = t1 - t0

        ct_orig = ns.new_data.obs['CellType'].astype(str)
        gof = (ct_orig == ns.membership).mean()
        identity = ct_orig.to_frame()
        identity['northstar_assignment'] = ns.membership

        vs = ns.embed('umap')
Пример #3
0
def atlas_averages_to_tsnedf(new_metadata, new_counttable, **kwargs):
    savedir = kwargs['savedir']
    date = kwargs['timestamp']
    n_pcs = kwargs['n_pcs']
    atlas = kwargs['atlas']
    cell_type_names = kwargs['CT_lut']

    #instantiate class
    sa = northstar.Averages(
        atlas=atlas,
        n_cells_per_type=kwargs['weights_atlas_cells'],
        n_features_per_cell_type=kwargs['n_features_per_cell_type'],
        n_features_overdispersed=kwargs['n_features_overdispersed'],
        n_pcs=n_pcs,
        n_neighbors=kwargs['n_neighbors'],
        n_neighbors_out_of_atlas=kwargs['n_neighbors_out_of_atlas'],
        distance_metric='correlation',
        threshold_neighborhood=kwargs['threshold_neighborhood'],
        clustering_metric='cpm',
        resolution_parameter=kwargs['resolution_parameter'],
        normalize_counts=True,
    )
    sa.fit(new_counttable)

    # add new membership to metadata
    idx = new_counttable.columns
    n_fixed = len(sa.cell_types)
    new_metadata.loc[idx, 'new_class'] = sa.membership
    new_metadata['new_class_renamed'] = [
        cell_type_names[f] if f in cell_type_names.keys() else 'NewClass_' +
        "{0:0=2d}".format(int(f) - n_fixed + 1) if (f.isdigit() == True) else f
        for f in new_metadata['new_class']
    ]

    # unweighted PCA
    cols = list(sa.cell_types) + list(new_counttable.columns)
    feature_selected_matrix = pd.DataFrame(index=sa.features_selected,
                                           columns=cols,
                                           data=sa.matrix)
    normal_PCA, distance_matrix = unweighted_PCA(feature_selected_matrix,
                                                 n_pcs)

    # perform tSNE
    tsnedf = perform_tSNE(normal_PCA, 20)
    tsnedf.rename(index=str, columns={0: 'Dim1', 1: 'Dim2'}, inplace=True)
    tsnedf.loc[idx, 'new_membership'] = new_metadata.loc[idx,
                                                         'new_class_renamed']
    tsnedf.loc[tsnedf[:n_fixed].index,
               'new_membership'] = tsnedf.index[:n_fixed].map(cell_type_names)

    # write params to json in new folder with date timestamp
    output_file = savedir + date + '/annotation_parameters_' + atlas + '_CellAtlasAverages_' + date + '.json'
    if not os.path.exists(os.path.dirname(output_file)):
        try:
            os.makedirs(os.path.dirname(output_file))
        except OSError as exc:
            if exc.errno != errno.EEXIST:
                raise
    with open(output_file, 'w') as file:
        file.write(json.dumps(kwargs))
        file.close()
    # save feature matrix for later reference, e.g. making dotplots
    feature_selected_matrix.to_csv(savedir + date +
                                   '/feature_selected_matrix_' + date + '.csv')

    atlastypes = list(
        np.sort(tsnedf.loc[tsnedf[:n_fixed].index, 'new_membership']))
    newtypes = list(
        set(new_metadata['new_class_renamed']).difference(atlastypes))
    celltypes = atlastypes + list(np.sort(newtypes))
    class_lut = dict(zip(celltypes, list(range(1, len(celltypes) + 1))))
    tsnedf['class'] = tsnedf['new_membership'].map(class_lut)

    return tsnedf, celltypes, distance_matrix