def clusterize_by_rt(self, error_rt): """ PUBLIC function Provide a basic clustering home made :param error_rt: return: list of clusters(as set) """ if self.rt_method == 1: logging.info("Basic clustering with rt_error:{0}".format(error_rt)) if not isinstance(error_rt, float): raise TypeError("[clusterize]: args[0] is not a float") return clusterize_basic(self.peakels, self.BASIC_RT_CALLABLE, error_rt) elif self.rt_method == 2: rts = [[x.rt] for x in self.peakels] matrix_dist = sp.spatial.distance.pdist(np.array(rts)) # metric = eclidean by default return list(clusterize_hierarchical(self.peakels, matrix_dist, "", error_rt).values()) elif self.rt_method == 3: logging.info("DB SCAN clustering with error_rt:{0}".format(error_rt)) rts = [[x.rt] for x in self.peakels] clusters = clusterize_dbscan(rts, self.peakels, eps=0.35) # with open('clusters.txt', 'w') as f: # for c in clusters: # for fe in c: # f.write(str(fe.rt) + '\n') # f.write('\n') return clusters # eps=error_rt / 2.0, min_samples=1) else: raise ValueError("wrong clustering technique !")
def clusterize_by_rt(self, error_rt): """ PUBLIC function Provide a basic clustering home made :param error_rt: return: list of clusters(as set) """ if self.rt_method == 1: logging.info("Basic clustering with rt_error:{0}".format(error_rt)) if not isinstance(error_rt, float): raise TypeError("[clusterize]: args[0] is not a float") return clusterize_basic(self.peakels, self.BASIC_RT_CALLABLE, error_rt) elif self.rt_method == 2: rts = [[x.rt] for x in self.peakels] matrix_dist = sp.spatial.distance.pdist(np.array(rts)) # metric = eclidean by default return list(clusterize_hierarchical(self.peakels, matrix_dist, "", error_rt).values()) elif self.rt_method == 3: logging.info('DB SCAN clustering with error_rt:{0}'.format(error_rt)) rts = [[x.rt] for x in self.peakels] clusters = clusterize_dbscan(rts, self.peakels, eps=0.35) # with open('clusters.txt', 'w') as f: # for c in clusters: # for fe in c: # f.write(str(fe.rt) + '\n') # f.write('\n') return clusters # eps=error_rt / 2.0, min_samples=1) else: raise ValueError("wrong clustering technique !")
def _check_update_corr_shape_in_rt_cluster(self, rt_cluster, distance_corr=DEFAULT_SHAPE_CORR): """ PRIVATE function calculate corral """ clust_list = None if self.corr_shape_method == 1: clust_list = clusterize_basic(rt_cluster, self.BASIC_CORR_SHAPE_CALLABLE, distance_corr) elif self.corr_shape_method == 2: ints = [[y.intensity for y in x.peaks] if len(x.peaks) else [0] for x in rt_cluster] matrix_dist = sp.spatial.distance.pdist(np.array(ints), metric="correlation") clust_list = clusterize_hierarchical(rt_cluster, matrix_dist, distance_corr, clip=True) return self._split_rt_cluster(clust_list)
def _check_update_corr_shape_in_rt_cluster(self, rt_cluster, distance_corr=DEFAULT_SHAPE_CORR): """ PRIVATE function calculate corral """ clust_list = None if self.corr_shape_method == 1: clust_list = clusterize_basic(rt_cluster, self.BASIC_CORR_SHAPE_CALLABLE, distance_corr) elif self.corr_shape_method == 2: ints = [[y.intensity for y in x.peaks] if len(x.peaks) else [0] for x in rt_cluster] matrix_dist = sp.spatial.distance.pdist(np.array(ints), metric='correlation') clust_list = clusterize_hierarchical(rt_cluster, matrix_dist, distance_corr, clip=True) return self._split_rt_cluster(clust_list)
def _check_update_corr_intensity_in_rt_cluster(self, rt_cluster, distance_corr=DEFAULT_INT_CORR): """ Private function """ if len(rt_cluster) == 1: return [] # rt_cluster, [] # clust_list = None if self.corr_int_method == 1: clust_list = clusterize_basic(rt_cluster, self.BASIC_CORR_INT_CALLABLE, distance_corr) elif self.corr_int_method == 2: ints = [list(x.area_by_sample_name.values()) for x in rt_cluster] # # matrix_dist = sp.spatial.distance.pdist(np.array(ints), metric='correlation') # ude by default all cores on the machine matrix_dist = pairwise_distances(np.array(ints), metric="correlation") # , n_jobs=-1) clust_list = clusterize_hierarchical(rt_cluster, matrix_dist, distance_corr, clip=True) else: raise ValueError("dbscan not supported for intensities correlation clustering") return clust_list # self._split_rt_cluster(clust_list)
def _check_update_corr_intensity_in_rt_cluster(self, rt_cluster, distance_corr=DEFAULT_INT_CORR): """ Private function """ if len(rt_cluster) == 1: return [] # rt_cluster, [] # clust_list = None if self.corr_int_method == 1: clust_list = clusterize_basic(rt_cluster, self.BASIC_CORR_INT_CALLABLE, distance_corr) elif self.corr_int_method == 2: ints = [list(x.area_by_sample_name.values()) for x in rt_cluster] # # matrix_dist = sp.spatial.distance.pdist(np.array(ints), metric='correlation') # ude by default all cores on the machine matrix_dist = pairwise_distances(np.array(ints), metric='correlation') # , n_jobs=-1) clust_list = clusterize_hierarchical(rt_cluster, matrix_dist, distance_corr, clip=True) else: raise ValueError("dbscan not supported for intensities correlation clustering") return clust_list # self._split_rt_cluster(clust_list)
def test_clusterize_basic(self): clusters = clusterize_basic(self.features, PeakelClusterer.BASIC_RT_CALLABLE, 6.0) print(("len clusters basic: {0}".format(len(clusters)))) self.assertGreaterEqual(4, len(clusters))