def _transform_cell_feats((cache_dir, images, normalization_name, output_filename, key, header)):
    try:
        import numpy as np
        from cpa.util.cache import Cache, normalizations
        cache = Cache(cache_dir)
        normalization = normalizations[normalization_name]
        normalizeddata, normalized_colnames = cache.load(images,
                                                    normalization=normalization)
        if len(normalizeddata) == 0:
            return np.empty(len(normalized_colnames)) * np.nan

        normalizeddata = normalizeddata[
                ~np.isnan(np.sum(normalizeddata,1)),:]

        if len(normalizeddata) == 0:
            return np.empty(len(normalized_colnames)) * np.nan

        # save the features to csv
        import csv
        filename = output_filename + "-" + "-".join(key) + ".csv"
        f = open(filename, 'w')
        w = csv.writer(f)
        w.writerow(header)
        for vector in normalizeddata:
            w.writerow(tuple(key) + tuple(vector))
        f.close()

    except: # catch *all* exceptions
        from traceback import print_exc
        import sys
        print_exc(None, sys.stderr)
        return None
def _compute_ksstatistic((cache_dir, images, control_images)):
    #try:
        import numpy as np 
        import sys
        from cpa.util.cache import Cache, RobustLinearNormalization
        from cpa.util.ks_2samp import ks_2samp

        cache = Cache(cache_dir)
        normalizeddata, variables = cache.load(images, normalization=RobustLinearNormalization)
        control_data, control_colnames = cache.load(control_images, normalization=RobustLinearNormalization)
        assert len(control_data) >= len(normalizeddata)
        assert variables == control_colnames
        #downsampled = control_data[np.random.randint(0, len(control_data), len(normalizeddata)), :]
        m = len(variables)
        profile = np.empty(m)
        for j in range(m):
            profile[j] = ks_2samp(control_data[:, j], normalizeddata[:, j],
                                   signed=True)[0]
        return profile
Пример #3
0
def _compute_ksstatistic((cache_dir, images, control_images)):
    import numpy as np 
    import sys
    from cpa.util.cache import Cache, RobustLinearNormalization
    from cpa.util.ks_2samp import ks_2samp

    cache = Cache(cache_dir)
    normalizeddata, variables = cache.load(images, normalization=RobustLinearNormalization)
    control_data, control_colnames = cache.load(control_images, normalization=RobustLinearNormalization)
    print normalizeddata.shape, control_data.shape
    assert len(control_data) >= len(normalizeddata)
    assert variables == control_colnames
    #downsampled = control_data[np.random.randint(0, len(control_data), len(normalizeddata)), :]
    m = len(variables)
    profile = np.empty(m)
    for j in range(m):
        profile[j] = ks_2samp(control_data[:, j], normalizeddata[:, j],
			      signed=True)[0]
    return profile
def _compute_svmnormalvector((cache_dir, images, control_images, rfe)):
    #try:
        import numpy as np 
        import sys
        from cpa.util.cache import Cache, RobustLinearNormalization
        from sklearn.svm import LinearSVC
        from cpa.util.profile_svmnormalvector import _compute_rfe

        cache = Cache(cache_dir)
        normalizeddata, normalized_colnames = cache.load(images, normalization=RobustLinearNormalization)
        control_data, control_colnames = cache.load(control_images, normalization=RobustLinearNormalization)
        assert len(control_data) >= len(normalizeddata)
        downsampled = control_data[np.random.randint(0, len(control_data), len(normalizeddata)), :]
        x = np.vstack((normalizeddata, downsampled))
        y = np.array([1] * len(normalizeddata) + [0] * len(downsampled))
        clf = LinearSVC(C=1.0)
        m = clf.fit(x, y)
        normal_vector = m.coef_[0]
        if rfe:
            normal_vector[~_compute_rfe(x, y)] = 0
        return normal_vector
def _compute_svmnormalvector((cache_dir, images, control_images, rfe)):
    #try:
    import numpy as np
    import sys
    from cpa.util.cache import Cache, RobustLinearNormalization
    from sklearn.svm import LinearSVC
    from cpa.util.profile_svmnormalvector import _compute_rfe

    cache = Cache(cache_dir)
    normalizeddata, normalized_colnames = cache.load(
        images, normalization=RobustLinearNormalization)
    control_data, control_colnames = cache.load(
        control_images, normalization=RobustLinearNormalization)
    assert len(control_data) >= len(normalizeddata)
    downsampled = control_data[
        np.random.randint(0, len(control_data), len(normalizeddata)), :]
    x = np.vstack((normalizeddata, downsampled))
    y = np.array([1] * len(normalizeddata) + [0] * len(downsampled))
    clf = LinearSVC(C=1.0)
    m = clf.fit(x, y)
    normal_vector = m.coef_[0]
    if rfe:
        normal_vector[~_compute_rfe(x, y)] = 0
    return normal_vector
Пример #6
0
def _compute_group_mean((cache_dir, images, normalization_name)):
    try:
        import numpy as np
        from cpa.util.cache import Cache, normalizations
        cache = Cache(cache_dir)
        normalization = normalizations[normalization_name]
        normalizeddata, normalized_colnames = cache.load(images,
                                                    normalization=normalization)
        if len(normalizeddata) == 0:
            return np.empty(len(normalized_colnames)) * np.nan

        normalizeddata = normalizeddata[
                ~np.isnan(np.sum(normalizeddata,1)),:]

        if len(normalizeddata) == 0:
            return np.empty(len(normalized_colnames)) * np.nan

        return np.mean(normalizeddata, axis = 0)
    except: # catch *all* exceptions
        from traceback import print_exc
        import sys
        print_exc(None, sys.stderr)
        return None