def _transform_cell_feats((cache_dir, images, normalization_name, output_filename, key, header)): try: import numpy as np from cpa.util.cache import Cache, normalizations cache = Cache(cache_dir) normalization = normalizations[normalization_name] normalizeddata, normalized_colnames = cache.load(images, normalization=normalization) if len(normalizeddata) == 0: return np.empty(len(normalized_colnames)) * np.nan normalizeddata = normalizeddata[ ~np.isnan(np.sum(normalizeddata,1)),:] if len(normalizeddata) == 0: return np.empty(len(normalized_colnames)) * np.nan # save the features to csv import csv filename = output_filename + "-" + "-".join(key) + ".csv" f = open(filename, 'w') w = csv.writer(f) w.writerow(header) for vector in normalizeddata: w.writerow(tuple(key) + tuple(vector)) f.close() except: # catch *all* exceptions from traceback import print_exc import sys print_exc(None, sys.stderr) return None
def _compute_ksstatistic((cache_dir, images, control_images)): #try: import numpy as np import sys from cpa.util.cache import Cache, RobustLinearNormalization from cpa.util.ks_2samp import ks_2samp cache = Cache(cache_dir) normalizeddata, variables = cache.load(images, normalization=RobustLinearNormalization) control_data, control_colnames = cache.load(control_images, normalization=RobustLinearNormalization) assert len(control_data) >= len(normalizeddata) assert variables == control_colnames #downsampled = control_data[np.random.randint(0, len(control_data), len(normalizeddata)), :] m = len(variables) profile = np.empty(m) for j in range(m): profile[j] = ks_2samp(control_data[:, j], normalizeddata[:, j], signed=True)[0] return profile
def _compute_ksstatistic((cache_dir, images, control_images)): import numpy as np import sys from cpa.util.cache import Cache, RobustLinearNormalization from cpa.util.ks_2samp import ks_2samp cache = Cache(cache_dir) normalizeddata, variables = cache.load(images, normalization=RobustLinearNormalization) control_data, control_colnames = cache.load(control_images, normalization=RobustLinearNormalization) print normalizeddata.shape, control_data.shape assert len(control_data) >= len(normalizeddata) assert variables == control_colnames #downsampled = control_data[np.random.randint(0, len(control_data), len(normalizeddata)), :] m = len(variables) profile = np.empty(m) for j in range(m): profile[j] = ks_2samp(control_data[:, j], normalizeddata[:, j], signed=True)[0] return profile
def _compute_svmnormalvector((cache_dir, images, control_images, rfe)): #try: import numpy as np import sys from cpa.util.cache import Cache, RobustLinearNormalization from sklearn.svm import LinearSVC from cpa.util.profile_svmnormalvector import _compute_rfe cache = Cache(cache_dir) normalizeddata, normalized_colnames = cache.load(images, normalization=RobustLinearNormalization) control_data, control_colnames = cache.load(control_images, normalization=RobustLinearNormalization) assert len(control_data) >= len(normalizeddata) downsampled = control_data[np.random.randint(0, len(control_data), len(normalizeddata)), :] x = np.vstack((normalizeddata, downsampled)) y = np.array([1] * len(normalizeddata) + [0] * len(downsampled)) clf = LinearSVC(C=1.0) m = clf.fit(x, y) normal_vector = m.coef_[0] if rfe: normal_vector[~_compute_rfe(x, y)] = 0 return normal_vector
def _compute_svmnormalvector((cache_dir, images, control_images, rfe)): #try: import numpy as np import sys from cpa.util.cache import Cache, RobustLinearNormalization from sklearn.svm import LinearSVC from cpa.util.profile_svmnormalvector import _compute_rfe cache = Cache(cache_dir) normalizeddata, normalized_colnames = cache.load( images, normalization=RobustLinearNormalization) control_data, control_colnames = cache.load( control_images, normalization=RobustLinearNormalization) assert len(control_data) >= len(normalizeddata) downsampled = control_data[ np.random.randint(0, len(control_data), len(normalizeddata)), :] x = np.vstack((normalizeddata, downsampled)) y = np.array([1] * len(normalizeddata) + [0] * len(downsampled)) clf = LinearSVC(C=1.0) m = clf.fit(x, y) normal_vector = m.coef_[0] if rfe: normal_vector[~_compute_rfe(x, y)] = 0 return normal_vector
def _compute_group_mean((cache_dir, images, normalization_name)): try: import numpy as np from cpa.util.cache import Cache, normalizations cache = Cache(cache_dir) normalization = normalizations[normalization_name] normalizeddata, normalized_colnames = cache.load(images, normalization=normalization) if len(normalizeddata) == 0: return np.empty(len(normalized_colnames)) * np.nan normalizeddata = normalizeddata[ ~np.isnan(np.sum(normalizeddata,1)),:] if len(normalizeddata) == 0: return np.empty(len(normalized_colnames)) * np.nan return np.mean(normalizeddata, axis = 0) except: # catch *all* exceptions from traceback import print_exc import sys print_exc(None, sys.stderr) return None