def hierarchical(train, merges=3):

    feats_train = RealFeatures(train)
    distance = EuclidianDistance(feats_train, feats_train)

    hierarchical = Hierarchical(merges, distance)
    hierarchical.train()

    out_distance = hierarchical.get_merge_distances()
    out_cluster = hierarchical.get_cluster_pairs()

    return hierarchical, out_distance, out_cluster
示例#2
0
def clustering_hierarchical_modular(fm_train=traindat, merges=3):

    from shogun.Distance import EuclidianDistance
    from shogun.Features import RealFeatures
    from shogun.Clustering import Hierarchical

    feats_train = RealFeatures(fm_train)
    distance = EuclidianDistance(feats_train, feats_train)

    hierarchical = Hierarchical(merges, distance)
    hierarchical.train()

    out_distance = hierarchical.get_merge_distances()
    out_cluster = hierarchical.get_cluster_pairs()

    return hierarchical, out_distance, out_cluster
def hierarchical ():
	print 'Hierarchical'

	from shogun.Distance import EuclidianDistance
	from shogun.Features import RealFeatures
	from shogun.Clustering import Hierarchical

	merges=3
	feats_train=RealFeatures(fm_train)
	distance=EuclidianDistance(feats_train, feats_train)

	hierarchical=Hierarchical(merges, distance)
	hierarchical.train()

	hierarchical.get_merge_distances()
	hierarchical.get_cluster_pairs()
示例#4
0
def perform_clustering(mss_id):

    import numpy
    import expenv
    
    mss = expenv.MultiSplitSet.get(mss_id)
    


    from method_mhc_mkl import SequencesHandler
    from shogun.Distance import EuclidianDistance, HammingWordDistance
    from shogun.Features import StringCharFeatures, StringWordFeatures, PROTEIN
    from shogun.Clustering import Hierarchical
    from shogun.PreProc import SortWordString
    
    order = 1
    gap = 0
    reverse = False
    
    seq_handler = SequencesHandler()
    
    data = [seq_handler.get_seq(ss.dataset.organism) for ss in mss.split_sets] 

    charfeat=StringCharFeatures(PROTEIN)
    charfeat.set_features(data)
    feats=StringWordFeatures(charfeat.get_alphabet())
    feats.obtain_from_char(charfeat, order-1, order, gap, reverse)
    preproc=SortWordString()
    preproc.init(feats)
    feats.add_preproc(preproc)
    feats.apply_preproc()

    
    use_sign = False

    distance = HammingWordDistance(feats, feats, use_sign)
    #distance = EuclidianDistance()
    
    merges=4
    hierarchical=Hierarchical(merges, distance)
    hierarchical.train()

    hierarchical.get_merge_distances()
    hierarchical.get_cluster_pairs()
    
    
    return hierarchical
def clustering_hierarchical_modular (fm_train=traindat,merges=3):

	from shogun.Distance import EuclidianDistance
	from shogun.Features import RealFeatures
	from shogun.Clustering import Hierarchical


	feats_train=RealFeatures(fm_train)
	distance=EuclidianDistance(feats_train, feats_train)

	hierarchical=Hierarchical(merges, distance)
	hierarchical.train()

	out_distance = hierarchical.get_merge_distances()
	out_cluster = hierarchical.get_cluster_pairs()

	return hierarchical,out_distance,out_cluster 
示例#6
0
def perform_clustering(mss_id):

    import numpy
    import expenv

    mss = expenv.MultiSplitSet.get(mss_id)

    from method_mhc_mkl import SequencesHandler
    from shogun.Distance import EuclidianDistance, HammingWordDistance
    from shogun.Features import StringCharFeatures, StringWordFeatures, PROTEIN
    from shogun.Clustering import Hierarchical
    from shogun.PreProc import SortWordString

    order = 1
    gap = 0
    reverse = False

    seq_handler = SequencesHandler()

    data = [seq_handler.get_seq(ss.dataset.organism) for ss in mss.split_sets]

    charfeat = StringCharFeatures(PROTEIN)
    charfeat.set_features(data)
    feats = StringWordFeatures(charfeat.get_alphabet())
    feats.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    preproc = SortWordString()
    preproc.init(feats)
    feats.add_preproc(preproc)
    feats.apply_preproc()

    use_sign = False

    distance = HammingWordDistance(feats, feats, use_sign)
    #distance = EuclidianDistance()

    merges = 4
    hierarchical = Hierarchical(merges, distance)
    hierarchical.train()

    hierarchical.get_merge_distances()
    hierarchical.get_cluster_pairs()

    return hierarchical