def hierarchical(train, merges=3): feats_train = RealFeatures(train) distance = EuclidianDistance(feats_train, feats_train) hierarchical = Hierarchical(merges, distance) hierarchical.train() out_distance = hierarchical.get_merge_distances() out_cluster = hierarchical.get_cluster_pairs() return hierarchical, out_distance, out_cluster
def clustering_hierarchical_modular(fm_train=traindat, merges=3): from shogun.Distance import EuclidianDistance from shogun.Features import RealFeatures from shogun.Clustering import Hierarchical feats_train = RealFeatures(fm_train) distance = EuclidianDistance(feats_train, feats_train) hierarchical = Hierarchical(merges, distance) hierarchical.train() out_distance = hierarchical.get_merge_distances() out_cluster = hierarchical.get_cluster_pairs() return hierarchical, out_distance, out_cluster
def hierarchical (): print 'Hierarchical' from shogun.Distance import EuclidianDistance from shogun.Features import RealFeatures from shogun.Clustering import Hierarchical merges=3 feats_train=RealFeatures(fm_train) distance=EuclidianDistance(feats_train, feats_train) hierarchical=Hierarchical(merges, distance) hierarchical.train() hierarchical.get_merge_distances() hierarchical.get_cluster_pairs()
def perform_clustering(mss_id): import numpy import expenv mss = expenv.MultiSplitSet.get(mss_id) from method_mhc_mkl import SequencesHandler from shogun.Distance import EuclidianDistance, HammingWordDistance from shogun.Features import StringCharFeatures, StringWordFeatures, PROTEIN from shogun.Clustering import Hierarchical from shogun.PreProc import SortWordString order = 1 gap = 0 reverse = False seq_handler = SequencesHandler() data = [seq_handler.get_seq(ss.dataset.organism) for ss in mss.split_sets] charfeat=StringCharFeatures(PROTEIN) charfeat.set_features(data) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats) feats.add_preproc(preproc) feats.apply_preproc() use_sign = False distance = HammingWordDistance(feats, feats, use_sign) #distance = EuclidianDistance() merges=4 hierarchical=Hierarchical(merges, distance) hierarchical.train() hierarchical.get_merge_distances() hierarchical.get_cluster_pairs() return hierarchical
def clustering_hierarchical_modular (fm_train=traindat,merges=3): from shogun.Distance import EuclidianDistance from shogun.Features import RealFeatures from shogun.Clustering import Hierarchical feats_train=RealFeatures(fm_train) distance=EuclidianDistance(feats_train, feats_train) hierarchical=Hierarchical(merges, distance) hierarchical.train() out_distance = hierarchical.get_merge_distances() out_cluster = hierarchical.get_cluster_pairs() return hierarchical,out_distance,out_cluster
def perform_clustering(mss_id): import numpy import expenv mss = expenv.MultiSplitSet.get(mss_id) from method_mhc_mkl import SequencesHandler from shogun.Distance import EuclidianDistance, HammingWordDistance from shogun.Features import StringCharFeatures, StringWordFeatures, PROTEIN from shogun.Clustering import Hierarchical from shogun.PreProc import SortWordString order = 1 gap = 0 reverse = False seq_handler = SequencesHandler() data = [seq_handler.get_seq(ss.dataset.organism) for ss in mss.split_sets] charfeat = StringCharFeatures(PROTEIN) charfeat.set_features(data) feats = StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order - 1, order, gap, reverse) preproc = SortWordString() preproc.init(feats) feats.add_preproc(preproc) feats.apply_preproc() use_sign = False distance = HammingWordDistance(feats, feats, use_sign) #distance = EuclidianDistance() merges = 4 hierarchical = Hierarchical(merges, distance) hierarchical.train() hierarchical.get_merge_distances() hierarchical.get_cluster_pairs() return hierarchical