示例#1
0
文件: promoters.py 项目: BotMukola/_5
def embed(file='data/mml.txt'):
	strings = []
	with open(file) as f:
		strings = [s.rstrip() for s in f.readlines()]
	
	features = StringCharFeatures(strings,DNA)
	kernel = WeightedDegreeStringKernel(10)
	distance = KernelDistance(1.0,kernel)
	distance.init(features,features)
	converter = MultidimensionalScaling()
	converter.set_target_dim(2)
	return converter.embed_distance(distance).get_feature_matrix(), strings
示例#2
0
def converter_multidimensionalscaling_modular(data_fname):
    try:
        import numpy
        from modshogun import RealFeatures, MultidimensionalScaling, EuclideanDistance, CSVFile

        features = RealFeatures(CSVFile(data_fname))

        distance_before = EuclideanDistance()
        distance_before.init(features, features)

        converter = MultidimensionalScaling()
        converter.set_target_dim(2)
        converter.set_landmark(False)
        embedding = converter.apply(features)

        distance_after = EuclideanDistance()
        distance_after.init(embedding, embedding)

        distance_matrix_after = distance_after.get_distance_matrix()
        distance_matrix_before = distance_before.get_distance_matrix()

        return numpy.linalg.norm(distance_matrix_after -
                                 distance_matrix_before) / numpy.linalg.norm(
                                     distance_matrix_before) < 1e-6
    except ImportError:
        print('No Eigen3 available')
示例#3
0
def embed(file='mml.pickle',N=500):
	strings = []
	print '%s reading %s' % (datetime.datetime.now(), file)
	file_contents = load(file)
	print '%s there are %d strings in %s' % (datetime.datetime.now(), len(file_contents['examples']), file)
	
	positives = numpy.where(numpy.array(file_contents['labels'])>0)[0]
	selected_idxs = random.sample(positives,N)
	for i in selected_idxs:
		strings.append(file_contents['examples'][i])
	
	features = StringCharFeatures(strings,DNA)
	kernel = WeightedDegreeStringKernel(10)
	distance = KernelDistance(1.0,kernel)
	distance.init(features,features)
	converter = MultidimensionalScaling()
	converter.set_target_dim(2)
	return converter.embed_distance(distance).get_feature_matrix(), strings
示例#4
0
文件: promoters.py 项目: genba/tapkee
def embed(file='mml.pickle'):
	strings = []

	print '%s reading %s' % (datetime.datetime.now(), file)
	file_contents = load(file)
	print '%s there are %d strings in %s' % (datetime.datetime.now(), len(file_contents['examples']), file)
	count = 0
	for i in xrange(len(file_contents['labels'])):
		if file_contents['labels'][i] > 0.0 and count<1000:
			strings.append(file_contents['examples'][i])
			count += 1
	features = StringCharFeatures(strings,DNA)
	kernel = WeightedDegreeStringKernel(10)
	distance = KernelDistance(1.0,kernel)
	distance.init(features,features)
	converter = MultidimensionalScaling()
	converter.set_target_dim(2)
	return converter.embed_distance(distance).get_feature_matrix(), strings
示例#5
0
文件: promoters.py 项目: genba/tapkee
def embed(file='mml.pickle'):
    strings = []

    print '%s reading %s' % (datetime.datetime.now(), file)
    file_contents = load(file)
    print '%s there are %d strings in %s' % (
        datetime.datetime.now(), len(file_contents['examples']), file)
    count = 0
    for i in xrange(len(file_contents['labels'])):
        if file_contents['labels'][i] > 0.0 and count < 1000:
            strings.append(file_contents['examples'][i])
            count += 1
    features = StringCharFeatures(strings, DNA)
    kernel = WeightedDegreeStringKernel(10)
    distance = KernelDistance(1.0, kernel)
    distance.init(features, features)
    converter = MultidimensionalScaling()
    converter.set_target_dim(2)
    return converter.embed_distance(distance).get_feature_matrix(), strings
def converter_multidimensionalscaling_modular (data_fname):
	try:
		import numpy
		from modshogun import RealFeatures, MultidimensionalScaling, EuclideanDistance, CSVFile
		
		features = RealFeatures(CSVFile(data_fname))
			
		distance_before = EuclideanDistance()
		distance_before.init(features,features)

		converter = MultidimensionalScaling()
		converter.set_target_dim(2)
		converter.set_landmark(False)
		embedding = converter.apply(features)

		distance_after = EuclideanDistance()
		distance_after.init(embedding,embedding)

		distance_matrix_after = distance_after.get_distance_matrix()
		distance_matrix_before = distance_before.get_distance_matrix()

		return numpy.linalg.norm(distance_matrix_after-distance_matrix_before)/numpy.linalg.norm(distance_matrix_before) < 1e-6
	except ImportError:
		print('No Eigen3 available')