示例#1
0
def _evaluate(indata):
    prefix = 'distribution_'
    # what is sg('likelihood')?
    likelihood = abs(sg('hmm_likelihood') - indata[prefix + 'likelihood'])
    return util.check_accuracy(indata[prefix + 'accuracy'],
                               likelihood=likelihood)

    # best path? which? no_b_trans? trans? trans_deriv?
    if indata['name'] == 'HMM':
        best_path = 0
        best_path_state = 0
        for i in xrange(indata[prefix + 'num_examples']):
            best_path += distribution.best_path(i)
            for j in xrange(indata[prefix + 'N']):
                best_path_state += distribution.get_best_path_state(i, j)

        best_path = abs(best_path - indata[prefix + 'best_path'])
        best_path_state=abs(best_path_state-\
         indata[prefix+'best_path_state'])

        return util.check_accuracy(indata[prefix + 'accuracy'],
                                   derivatives=derivatives,
                                   likelihood=likelihood,
                                   best_path=best_path,
                                   best_path_state=best_path_state)
    else:
        return util.check_accuracy(indata[prefix + 'accuracy'],
                                   derivatives=derivatives,
                                   likelihood=likelihood)
示例#2
0
def _evaluate(indata):
    if indata.has_key('clustering_k'):
        first_arg = indata['clustering_k']
    elif indata.has_key('clustering_merges'):
        first_arg = indata['clustering_merges']
    else:
        return False

    feats = util.get_features(indata, 'distance_')
    dfun = eval(indata['distance_name'])
    distance = dfun(feats['train'], feats['train'])

    cfun = eval(indata['clustering_name'])
    clustering = cfun(first_arg, distance)
    clustering.train()

    if indata.has_key('clustering_radi'):
        radi = max(abs(clustering.get_radiuses() - indata['clustering_radi']))
        centers=max(abs(clustering.get_cluster_centers().flatten() - \
         indata['clustering_centers'].flat))
        return util.check_accuracy(indata['clustering_accuracy'],
                                   radi=radi,
                                   centers=centers)
    elif indata.has_key('clustering_merge_distance'):
        merge_distance=max(abs(clustering.get_merge_distances()- \
         indata['clustering_merge_distance']))
        pairs=max(abs(clustering.get_cluster_pairs()- \
         indata['clustering_pairs']).flat)
        return util.check_accuracy(indata['clustering_accuracy'],
                                   merge_distance=merge_distance,
                                   pairs=pairs)
    else:
        return util.check_accuracy(indata['clustering_accuracy'])
示例#3
0
def _evaluate (indata):
	prefix='distribution_'
	# what is sg('likelihood')?
	likelihood=abs(sg('hmm_likelihood')-indata[prefix+'likelihood'])
	return util.check_accuracy(indata[prefix+'accuracy'],
		likelihood=likelihood)

	# best path? which? no_b_trans? trans? trans_deriv?
	if indata['name']=='HMM':
		best_path=0
		best_path_state=0
		for i in xrange(indata[prefix+'num_examples']):
			best_path+=distribution.best_path(i)
			for j in xrange(indata[prefix+'N']):
				best_path_state+=distribution.get_best_path_state(i, j)

		best_path=abs(best_path-indata[prefix+'best_path'])
		best_path_state=abs(best_path_state-\
			indata[prefix+'best_path_state'])

		return util.check_accuracy(indata[prefix+'accuracy'],
			derivatives=derivatives, likelihood=likelihood,
			best_path=best_path, best_path_state=best_path_state)
	else:
		return util.check_accuracy(indata[prefix+'accuracy'],
			derivatives=derivatives, likelihood=likelihood)
示例#4
0
def _evaluate (indata):
	if 'clustering_k' in indata:
		first_arg=indata['clustering_k']
	elif 'clustering_merges' in indata:
		first_arg=indata['clustering_merges']
	else:
		return False

	feats=util.get_features(indata, 'distance_')
	dfun=eval(indata['distance_name'])
	distance=dfun(feats['train'], feats['train'])

	cfun=eval(indata['clustering_name'])
	clustering=cfun(first_arg, distance)
	clustering.train()

	if 'clustering_radi' in indata:
		radi=max(abs(clustering.get_radiuses()-indata['clustering_radi']))
		centers=max(abs(clustering.get_cluster_centers().flatten() - \
			indata['clustering_centers'].flat))
		return util.check_accuracy(indata['clustering_accuracy'],
			radi=radi, centers=centers)
	elif 'clustering_merge_distance' in indata:
		merge_distance=max(abs(clustering.get_merge_distances()- \
			indata['clustering_merge_distance']))
		pairs=max(abs(clustering.get_cluster_pairs()- \
			indata['clustering_pairs']).flat)
		return util.check_accuracy(indata['clustering_accuracy'],
			merge_distance=merge_distance, pairs=pairs)
	else:
		return util.check_accuracy(indata['clustering_accuracy'])
示例#5
0
def _evaluate(indata):
    prefix = 'distribution_'
    feats = util.get_features(indata, prefix)

    if indata[prefix + 'name'] == 'HMM':
        distribution = HMM(feats['train'], indata[prefix + 'N'],
                           indata[prefix + 'M'], indata[prefix + 'pseudo'])
        distribution.train()
        distribution.baum_welch_viterbi_train(BW_NORMAL)
    else:
        dfun = eval(indata[prefix + 'name'])
        distribution = dfun(feats['train'])
        distribution.train()

    likelihood = distribution.get_log_likelihood_sample()
    num_examples = feats['train'].get_num_vectors()
    num_param = distribution.get_num_model_parameters()
    derivatives = 0
    for i in xrange(num_param):
        for j in xrange(num_examples):
            val = distribution.get_log_derivative(i, j)
            if val != -inf and val != nan:  # only consider sparse matrix!
                derivatives += val

    derivatives = abs(derivatives - indata[prefix + 'derivatives'])
    likelihood = abs(likelihood - indata[prefix + 'likelihood'])

    if indata[prefix + 'name'] == 'HMM':
        best_path = 0
        best_path_state = 0
        for i in xrange(indata[prefix + 'num_examples']):
            best_path += distribution.best_path(i)
            for j in xrange(indata[prefix + 'N']):
                best_path_state += distribution.get_best_path_state(i, j)

        best_path = abs(best_path - indata[prefix + 'best_path'])
        best_path_state=abs(best_path_state-\
         indata[prefix+'best_path_state'])

        return util.check_accuracy(indata[prefix + 'accuracy'],
                                   derivatives=derivatives,
                                   likelihood=likelihood,
                                   best_path=best_path,
                                   best_path_state=best_path_state)
    else:
        return util.check_accuracy(indata[prefix + 'accuracy'],
                                   derivatives=derivatives,
                                   likelihood=likelihood)
示例#6
0
def _evaluate_custom(indata, prefix):
    feats = {
        'train': RealFeatures(indata[prefix + 'data']),
        'test': RealFeatures(indata[prefix + 'data'])
    }

    symdata = indata[prefix + 'symdata']
    lowertriangle = array([
        symdata[(x, y)] for x in xrange(symdata.shape[1])
        for y in xrange(symdata.shape[0]) if y <= x
    ])
    kernel = CustomKernel()
    kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
    triangletriangle = max(
        abs(indata[prefix + 'matrix_triangletriangle'] -
            kernel.get_kernel_matrix()).flat)
    kernel.set_triangle_kernel_matrix_from_full(indata[prefix + 'symdata'])
    fulltriangle = max(
        abs(indata[prefix + 'matrix_fulltriangle'] -
            kernel.get_kernel_matrix()).flat)
    kernel.set_full_kernel_matrix_from_full(indata[prefix + 'data'])
    fullfull = max(
        abs(indata[prefix + 'matrix_fullfull'] -
            kernel.get_kernel_matrix()).flat)

    return util.check_accuracy(indata[prefix + 'accuracy'],
                               triangletriangle=triangletriangle,
                               fulltriangle=fulltriangle,
                               fullfull=fullfull)
示例#7
0
def _evaluate_custom (indata, prefix):
	raise NotImplementedError, 'Custom kernel not yet implemented in static interfaces.'

	symdata=indata[prefix+'symdata']
	lowertriangle=array([symdata[(x,y)] for x in xrange(symdata.shape[1])
		for y in xrange(symdata.shape[0]) if y<=x])

	sg('set_kernel', 'CUSTOM')
	sg('set_triangle_kernel_matrix_from_triangle', lowertriangle)
	triangletriangle=max(abs(
		indata[prefix+'matrix_triangletriangle']-sg('get_kernel_matrix')).flat)

	sg('set_triangle_kernel_matrix_from_full', indata[prefix+'symdata'])
	fulltriangle=max(abs(
		indata[prefix+'matrix_fulltriangle']-sg('get_kernel_matrix')).flat)

	sg('set_full_kernel_matrix_from_full', indata[prefix+'data'])
	fullfull=max(abs(
		indata[prefix+'matrix_fullfull']-sg('get_kernel_matrix')).flat)

	return util.check_accuracy(
		indata[prefix+'accuracy'],
		triangletriangle=triangletriangle,
		fulltriangle=fulltriangle,
		fullfull=fullfull
	)
示例#8
0
def _evaluate(indata):
    prefix = 'kernel_'
    feats = util.get_features(indata, prefix)
    kargs = util.get_args(indata, prefix)
    fun = eval(indata[prefix + 'name'] + 'Kernel')
    kernel = fun(feats['train'], feats['train'], *kargs)

    prefix = 'regression_'
    kernel.parallel.set_num_threads(indata[prefix + 'num_threads'])

    try:
        name = indata[prefix + 'name']
        if (name == 'KERNELRIDGEREGRESSION'):
            name = 'KernelRidgeRegression'

        rfun = eval(name)
    except NameError as e:
        print("%s is disabled/unavailable!" % indata[prefix + 'name'])
        return False

    labels = RegressionLabels(double(indata[prefix + 'labels']))
    if indata[prefix + 'type'] == 'svm':
        regression = rfun(indata[prefix + 'C'], indata[prefix + 'epsilon'],
                          kernel, labels)
    elif indata[prefix + 'type'] == 'kernelmachine':
        regression = rfun(indata[prefix + 'tau'], kernel, labels)
    else:
        return False

    regression.parallel.set_num_threads(indata[prefix + 'num_threads'])
    if prefix + 'tube_epsilon' in indata:
        regression.set_tube_epsilon(indata[prefix + 'tube_epsilon'])

    regression.train()

    alphas = 0
    bias = 0
    sv = 0
    if prefix + 'bias' in indata:
        bias = abs(regression.get_bias() - indata[prefix + 'bias'])
    if prefix + 'alphas' in indata:
        for item in regression.get_alphas().tolist():
            alphas += item
        alphas = abs(alphas - indata[prefix + 'alphas'])
    if prefix + 'support_vectors' in indata:
        for item in inregression.get_support_vectors().tolist():
            sv += item
        sv = abs(sv - indata[prefix + 'support_vectors'])

    kernel.init(feats['train'], feats['test'])
    classified = max(
        abs(regression.apply().get_labels() - indata[prefix + 'classified']))

    return util.check_accuracy(indata[prefix + 'accuracy'],
                               alphas=alphas,
                               bias=bias,
                               support_vectors=sv,
                               classified=classified)
示例#9
0
def _evaluate (indata):
	prefix='distribution_'
	feats=util.get_features(indata, prefix)

	if indata[prefix+'name']=='HMM':
		distribution=HMM(feats['train'], indata[prefix+'N'],
			indata[prefix+'M'], indata[prefix+'pseudo'])
		distribution.train()
		distribution.baum_welch_viterbi_train(BW_NORMAL)
	else:
		dfun=eval(indata[prefix+'name'])
		distribution=dfun(feats['train'])
		distribution.train()

	likelihood=distribution.get_log_likelihood_sample()
	num_examples=feats['train'].get_num_vectors()
	num_param=distribution.get_num_model_parameters()
	derivatives=0
	for i in xrange(num_param):
		for j in xrange(num_examples):
			val=distribution.get_log_derivative(i, j)
			if val!=-inf and val!=nan: # only consider sparse matrix!
				derivatives+=val

	derivatives=abs(derivatives-indata[prefix+'derivatives'])
	likelihood=abs(likelihood-indata[prefix+'likelihood'])

	if indata[prefix+'name']=='HMM':
		best_path=0
		best_path_state=0
		for i in xrange(indata[prefix+'num_examples']):
			best_path+=distribution.best_path(i)
			for j in xrange(indata[prefix+'N']):
				best_path_state+=distribution.get_best_path_state(i, j)

		best_path=abs(best_path-indata[prefix+'best_path'])
		best_path_state=abs(best_path_state-\
			indata[prefix+'best_path_state'])

		return util.check_accuracy(indata[prefix+'accuracy'],
			derivatives=derivatives, likelihood=likelihood,
			best_path=best_path, best_path_state=best_path_state)
	else:
		return util.check_accuracy(indata[prefix+'accuracy'],
			derivatives=derivatives, likelihood=likelihood)
示例#10
0
def _evaluate (indata, prefix):
	dmatrix=sg('get_distance_matrix', 'TRAIN')
	dm_train=max(abs(indata['distance_matrix_train']-dmatrix).flat)

	dmatrix=sg('get_distance_matrix', 'TEST')
	dm_test=max(abs(indata['distance_matrix_test']-dmatrix).flat)

	return util.check_accuracy(
		indata[prefix+'accuracy'], dm_train=dm_train, dm_test=dm_test)
示例#11
0
def _evaluate (indata):
	prefix='kernel_'
	feats=util.get_features(indata, prefix)
	kargs=util.get_args(indata, prefix)
	fun=eval(indata[prefix+'name']+'Kernel')
	kernel=fun(feats['train'], feats['train'], *kargs)

	prefix='regression_'
	kernel.parallel.set_num_threads(indata[prefix+'num_threads'])

	try:
		name = indata[prefix+'name']
		if (name=='KERNELRIDGEREGRESSION'):
			name = 'KernelRidgeRegression'

		rfun=eval(name)
	except NameError as e:
		print("%s is disabled/unavailable!"%indata[prefix+'name'])
		return False

	labels=RegressionLabels(double(indata[prefix+'labels']))
	if indata[prefix+'type']=='svm':
		regression=rfun(
			indata[prefix+'C'], indata[prefix+'epsilon'], kernel, labels)
	elif indata[prefix+'type']=='kernelmachine':
		regression=rfun(indata[prefix+'tau'], kernel, labels)
	else:
		return False

	regression.parallel.set_num_threads(indata[prefix+'num_threads'])
	if prefix+'tube_epsilon' in indata:
		regression.set_tube_epsilon(indata[prefix+'tube_epsilon'])

	regression.train()

	alphas=0
	bias=0
	sv=0
	if prefix+'bias' in indata:
		bias=abs(regression.get_bias()-indata[prefix+'bias'])
	if prefix+'alphas' in indata:
		for item in regression.get_alphas().tolist():
			alphas+=item
		alphas=abs(alphas-indata[prefix+'alphas'])
	if prefix+'support_vectors' in indata:
		for item in inregression.get_support_vectors().tolist():
			sv+=item
		sv=abs(sv-indata[prefix+'support_vectors'])

	kernel.init(feats['train'], feats['test'])
	classified=max(abs(
		regression.apply().get_labels()-indata[prefix+'classified']))

	return util.check_accuracy(indata[prefix+'accuracy'], alphas=alphas,
		bias=bias, support_vectors=sv, classified=classified)
示例#12
0
def _evaluate (indata, prefix):
	if indata.has_key(prefix+'radi'):
		[radi, centers]=sg('get_clustering')
		radi=max(abs(radi.T[0]-indata[prefix+'radi']))
		centers=max(abs(centers-indata[prefix+'centers']).flat)

		return util.check_accuracy(indata[prefix+'accuracy'],
			radi=radi, centers=centers)

	elif indata.has_key(prefix+'merge_distance'):
		[merge_distances, pairs]=sg('get_clustering')
		merge_distances=max(abs(merge_distances.T[0]- \
			indata[prefix+'merge_distance']))
		pairs=max(abs(pairs-indata[prefix+'pairs']).flat)

		return util.check_accuracy(indata[prefix+'accuracy'],
			merge_distances=merge_distances, pairs=pairs)

	else:
		raise StandardError, 'Incomplete clustering data.'
示例#13
0
def _evaluate (indata, prefix):
	util.set_and_train_kernel(indata)

	kmatrix=sg('get_kernel_matrix', 'TRAIN')
	km_train=max(abs(indata['kernel_matrix_train']-kmatrix).flat)

	kmatrix=sg('get_kernel_matrix', 'TEST')
	km_test=max(abs(indata['kernel_matrix_test']-kmatrix).flat)

	return util.check_accuracy(
		indata[prefix+'accuracy'], km_train=km_train, km_test=km_test)
示例#14
0
def _evaluate(indata, prefix):
    if indata.has_key(prefix + 'radi'):
        [radi, centers] = sg('get_clustering')
        radi = max(abs(radi.T[0] - indata[prefix + 'radi']))
        centers = max(abs(centers - indata[prefix + 'centers']).flat)

        return util.check_accuracy(indata[prefix + 'accuracy'],
                                   radi=radi,
                                   centers=centers)

    elif indata.has_key(prefix + 'merge_distance'):
        [merge_distances, pairs] = sg('get_clustering')
        merge_distances=max(abs(merge_distances.T[0]- \
         indata[prefix+'merge_distance']))
        pairs = max(abs(pairs - indata[prefix + 'pairs']).flat)

        return util.check_accuracy(indata[prefix + 'accuracy'],
                                   merge_distances=merge_distances,
                                   pairs=pairs)

    else:
        raise StandardError, 'Incomplete clustering data.'
示例#15
0
def _evaluate (indata):
	prefix='distance_'
	feats=util.get_features(indata, prefix)

	dfun=eval(indata[prefix+'name'])
	dargs=util.get_args(indata, prefix)
	distance=dfun(feats['train'], feats['train'], *dargs)

	dm_train=max(abs(
		indata[prefix+'matrix_train']-distance.get_distance_matrix()).flat)
	distance.init(feats['train'], feats['test'])
	dm_test=max(abs(
		indata[prefix+'matrix_test']-distance.get_distance_matrix()).flat)

	return util.check_accuracy(
		indata[prefix+'accuracy'], dm_train=dm_train, dm_test=dm_test)
示例#16
0
def _evaluate(indata):
    prefix = "kernel_"
    feats = util.get_features(indata, prefix)
    kfun = eval(indata[prefix + "name"] + "Kernel")
    kargs = util.get_args(indata, prefix)

    prefix = "preproc_"
    pargs = util.get_args(indata, prefix)
    feats = util.add_preproc(indata[prefix + "name"], feats, *pargs)

    prefix = "kernel_"
    kernel = kfun(feats["train"], feats["train"], *kargs)
    km_train = max(abs(indata[prefix + "matrix_train"] - kernel.get_kernel_matrix()).flat)
    kernel.init(feats["train"], feats["test"])
    km_test = max(abs(indata[prefix + "matrix_test"] - kernel.get_kernel_matrix()).flat)

    return util.check_accuracy(indata[prefix + "accuracy"], km_train=km_train, km_test=km_test)
示例#17
0
def _evaluate (indata, prefix):
	feats=util.get_features(indata, prefix)
	kfun=eval(indata[prefix+'name']+'Kernel')
	kargs=util.get_args(indata, prefix)
	kernel=kfun(*kargs)
	if indata.has_key(prefix+'normalizer'):
		kernel.set_normalizer(eval(indata[prefix+'normalizer']+'()'))

	kernel.init(feats['train'], feats['train'])
	km_train=max(abs(
		indata[prefix+'matrix_train']-kernel.get_kernel_matrix()).flat)
	kernel.init(feats['train'], feats['test'])
	km_test=max(abs(
		indata[prefix+'matrix_test']-kernel.get_kernel_matrix()).flat)

	return util.check_accuracy(
		indata[prefix+'accuracy'], km_train=km_train, km_test=km_test)
示例#18
0
def _evaluate(indata, prefix):
    feats = util.get_features(indata, prefix)
    kfun = eval(indata[prefix + 'name'] + 'Kernel')
    kargs = util.get_args(indata, prefix)
    kernel = kfun(*kargs)
    if indata.has_key(prefix + 'normalizer'):
        kernel.set_normalizer(eval(indata[prefix + 'normalizer'] + '()'))

    kernel.init(feats['train'], feats['train'])
    km_train = max(
        abs(indata[prefix + 'matrix_train'] - kernel.get_kernel_matrix()).flat)
    kernel.init(feats['train'], feats['test'])
    km_test = max(
        abs(indata[prefix + 'matrix_test'] - kernel.get_kernel_matrix()).flat)

    return util.check_accuracy(indata[prefix + 'accuracy'],
                               km_train=km_train,
                               km_test=km_test)
示例#19
0
def _evaluate(indata):
    prefix = 'distance_'
    feats = util.get_features(indata, prefix)

    dfun = eval(indata[prefix + 'name'])
    dargs = util.get_args(indata, prefix)
    distance = dfun(feats['train'], feats['train'], *dargs)

    dm_train = max(
        abs(indata[prefix + 'matrix_train'] -
            distance.get_distance_matrix()).flat)
    distance.init(feats['train'], feats['test'])
    dm_test = max(
        abs(indata[prefix + 'matrix_test'] -
            distance.get_distance_matrix()).flat)

    return util.check_accuracy(indata[prefix + 'accuracy'],
                               dm_train=dm_train,
                               dm_test=dm_test)
示例#20
0
def _evaluate (indata):
	prefix='kernel_'
	feats=util.get_features(indata, prefix)
	kfun=eval(indata[prefix+'name']+'Kernel')
	kargs=util.get_args(indata, prefix)

	prefix='preprocessor_'
	pargs=util.get_args(indata, prefix)
	feats=util.add_preprocessor(indata[prefix+'name'], feats, *pargs)

	prefix='kernel_'
	kernel=kfun(feats['train'], feats['train'], *kargs)
	km_train=max(abs(
		indata[prefix+'matrix_train']-kernel.get_kernel_matrix()).flat)
	kernel.init(feats['train'], feats['test'])
	km_test=max(abs(
		indata[prefix+'matrix_test']-kernel.get_kernel_matrix()).flat)

	return util.check_accuracy(
		indata[prefix+'accuracy'], km_train=km_train, km_test=km_test)
示例#21
0
def _evaluate (indata):
	alphas=0
	bias=0
	sv=0

	if indata.has_key('regression_bias'):
		[bias, weights]=sg('get_svm')
		weights=weights.T
		bias=abs(bias-indata['regression_bias'])
		for item in weights[0].tolist():
			alphas+=item
		alphas=abs(alphas-indata['regression_alpha_sum'])
		for item in weights[1].tolist():
			sv+=item
		sv=abs(sv-indata['regression_sv_sum'])

	classified=max(abs(sg('classify')-indata['regression_classified']))

	return util.check_accuracy(indata['regression_accuracy'],
		alphas=alphas, bias=bias, support_vectors=sv, classified=classified)
示例#22
0
def _evaluate_auc (indata, prefix):
	subk=_get_subkernels(indata, prefix)['0']
	feats_subk=util.get_features(subk, '')
	subk['kernel'].init(feats_subk['train'], feats_subk['test'])

	feats={
		'train': WordFeatures(indata[prefix+'data_train'].astype(ushort)),
		'test': WordFeatures(indata[prefix+'data_test'].astype(ushort))
	}
	kernel=AUCKernel(10, subk['kernel'])

	kernel.init(feats['train'], feats['train'])
	km_train=max(abs(
		indata[prefix+'matrix_train']-kernel.get_kernel_matrix()).flat)
	kernel.init(feats['train'], feats['test'])
	km_test=max(abs(
		indata[prefix+'matrix_test']-kernel.get_kernel_matrix()).flat)

	return util.check_accuracy(indata[prefix+'accuracy'],
		km_train=km_train, km_test=km_test)
示例#23
0
def _evaluate_combined (indata, prefix):
	kernel=CombinedKernel()
	feats={'train':CombinedFeatures(), 'test':CombinedFeatures()}

	subkernels=_get_subkernels(indata, prefix)
	for subk in subkernels.itervalues():
		feats_subk=util.get_features(subk, '')
		feats['train'].append_feature_obj(feats_subk['train'])
		feats['test'].append_feature_obj(feats_subk['test'])
		kernel.append_kernel(subk['kernel'])

	kernel.init(feats['train'], feats['train'])
	km_train=max(abs(
		indata['kernel_matrix_train']-kernel.get_kernel_matrix()).flat)
	kernel.init(feats['train'], feats['test'])
	km_test=max(abs(
		indata['kernel_matrix_test']-kernel.get_kernel_matrix()).flat)

	return util.check_accuracy(indata[prefix+'accuracy'],
		km_train=km_train, km_test=km_test)
示例#24
0
def _evaluate_top_fisher(indata, prefix):
    feats = {}
    wordfeats = util.get_features(indata, prefix)

    pos_train = HMM(wordfeats['train'], indata[prefix + 'N'],
                    indata[prefix + 'M'], indata[prefix + 'pseudo'])
    pos_train.train()
    pos_train.baum_welch_viterbi_train(BW_NORMAL)
    neg_train = HMM(wordfeats['train'], indata[prefix + 'N'],
                    indata[prefix + 'M'], indata[prefix + 'pseudo'])
    neg_train.train()
    neg_train.baum_welch_viterbi_train(BW_NORMAL)
    pos_test = HMM(pos_train)
    pos_test.set_observations(wordfeats['test'])
    neg_test = HMM(neg_train)
    neg_test.set_observations(wordfeats['test'])

    if indata[prefix + 'name'] == 'TOP':
        feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False)
        feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False)
    else:
        feats['train'] = FKFeatures(10, pos_train, neg_train)
        feats['train'].set_opt_a(-1)  #estimate prior
        feats['test'] = FKFeatures(10, pos_test, neg_test)
        feats['test'].set_a(
            feats['train'].get_a())  #use prior from training data

    prefix = 'kernel_'
    args = util.get_args(indata, prefix)
    kernel = PolyKernel(feats['train'], feats['train'], *args)
    #	kernel=PolyKernel(*args)
    #	kernel.init(feats['train'], feats['train'])
    km_train = max(
        abs(indata[prefix + 'matrix_train'] - kernel.get_kernel_matrix()).flat)
    kernel.init(feats['train'], feats['test'])
    km_test = max(
        abs(indata[prefix + 'matrix_test'] - kernel.get_kernel_matrix()).flat)

    return util.check_accuracy(indata[prefix + 'accuracy'],
                               km_train=km_train,
                               km_test=km_test)
示例#25
0
def _evaluate_auc(indata, prefix):
    subk = _get_subkernels(indata, prefix)['0']
    feats_subk = util.get_features(subk, '')
    subk['kernel'].init(feats_subk['train'], feats_subk['test'])

    feats = {
        'train': WordFeatures(indata[prefix + 'data_train'].astype(ushort)),
        'test': WordFeatures(indata[prefix + 'data_test'].astype(ushort))
    }
    kernel = AUCKernel(10, subk['kernel'])

    kernel.init(feats['train'], feats['train'])
    km_train = max(
        abs(indata[prefix + 'matrix_train'] - kernel.get_kernel_matrix()).flat)
    kernel.init(feats['train'], feats['test'])
    km_test = max(
        abs(indata[prefix + 'matrix_test'] - kernel.get_kernel_matrix()).flat)

    return util.check_accuracy(indata[prefix + 'accuracy'],
                               km_train=km_train,
                               km_test=km_test)
示例#26
0
文件: preproc.py 项目: xfrancv/shogun
def _evaluate(indata):
    prefix = 'kernel_'
    feats = util.get_features(indata, prefix)
    kfun = eval(indata[prefix + 'name'] + 'Kernel')
    kargs = util.get_args(indata, prefix)

    prefix = 'preproc_'
    pargs = util.get_args(indata, prefix)
    feats = util.add_preproc(indata[prefix + 'name'], feats, *pargs)

    prefix = 'kernel_'
    kernel = kfun(feats['train'], feats['train'], *kargs)
    km_train = max(
        abs(indata[prefix + 'matrix_train'] - kernel.get_kernel_matrix()).flat)
    kernel.init(feats['train'], feats['test'])
    km_test = max(
        abs(indata[prefix + 'matrix_test'] - kernel.get_kernel_matrix()).flat)

    return util.check_accuracy(indata[prefix + 'accuracy'],
                               km_train=km_train,
                               km_test=km_test)
示例#27
0
def _evaluate_combined(indata, prefix):
    kernel = CombinedKernel()
    feats = {'train': CombinedFeatures(), 'test': CombinedFeatures()}

    subkernels = _get_subkernels(indata, prefix)
    for subk in subkernels.itervalues():
        feats_subk = util.get_features(subk, '')
        feats['train'].append_feature_obj(feats_subk['train'])
        feats['test'].append_feature_obj(feats_subk['test'])
        kernel.append_kernel(subk['kernel'])

    kernel.init(feats['train'], feats['train'])
    km_train = max(
        abs(indata['kernel_matrix_train'] - kernel.get_kernel_matrix()).flat)
    kernel.init(feats['train'], feats['test'])
    km_test = max(
        abs(indata['kernel_matrix_test'] - kernel.get_kernel_matrix()).flat)

    return util.check_accuracy(indata[prefix + 'accuracy'],
                               km_train=km_train,
                               km_test=km_test)
示例#28
0
def _evaluate (indata, prefix):
	alphas=0
	bias=0
	sv=0

	if indata[prefix+'type']=='lda':
		pass
	else:
		if indata.has_key(prefix+'label_type') and \
			indata[prefix+'label_type'] != 'series' and \
			indata.has_key(prefix+'bias'):
			[b, weights]=sg('get_svm')
			weights=weights.T
			bias=abs(b-indata[prefix+'bias'])

		alphas, sv=_get_alpha_and_sv(indata, prefix)

	classified=max(abs(sg('classify')-indata[prefix+'classified']))

	return util.check_accuracy(indata[prefix+'accuracy'],
		alphas=alphas, bias=bias, support_vectors=sv, classified=classified)
示例#29
0
def _evaluate_top_fisher (indata, prefix):
	feats={}
	wordfeats=util.get_features(indata, prefix)

	pos_train=HMM(wordfeats['train'], indata[prefix+'N'], indata[prefix+'M'],
		indata[prefix+'pseudo'])
	pos_train.train()
	pos_train.baum_welch_viterbi_train(BW_NORMAL)
	neg_train=HMM(wordfeats['train'], indata[prefix+'N'], indata[prefix+'M'],
		indata[prefix+'pseudo'])
	neg_train.train()
	neg_train.baum_welch_viterbi_train(BW_NORMAL)
	pos_test=HMM(pos_train)
	pos_test.set_observations(wordfeats['test'])
	neg_test=HMM(neg_train)
	neg_test.set_observations(wordfeats['test'])

	if indata[prefix+'name']=='TOP':
		feats['train']=TOPFeatures(10, pos_train, neg_train, False, False)
		feats['test']=TOPFeatures(10, pos_test, neg_test, False, False)
	else:
		feats['train']=FKFeatures(10, pos_train, neg_train)
		feats['train'].set_opt_a(-1) #estimate prior
		feats['test']=FKFeatures(10, pos_test, neg_test)
		feats['test'].set_a(feats['train'].get_a()) #use prior from training data

	prefix='kernel_'
	args=util.get_args(indata, prefix)
	kernel=PolyKernel(feats['train'], feats['train'], *args)
#	kernel=PolyKernel(*args)
#	kernel.init(feats['train'], feats['train'])
	km_train=max(abs(
		indata[prefix+'matrix_train']-kernel.get_kernel_matrix()).flat)
	kernel.init(feats['train'], feats['test'])
	km_test=max(abs(
		indata[prefix+'matrix_test']-kernel.get_kernel_matrix()).flat)

	return util.check_accuracy(indata[prefix+'accuracy'],
		km_train=km_train, km_test=km_test)
示例#30
0
def _evaluate_pie (indata, prefix):
	pie=PluginEstimate()
	feats=util.get_features(indata, prefix)
	labels=BinaryLabels(double(indata['classifier_labels']))
	pie.set_labels(labels)
	pie.set_features(feats['train'])
	pie.train()

	fun=eval(indata[prefix+'name']+'Kernel')
	kernel=fun(feats['train'], feats['train'], pie)
	km_train=max(abs(
		indata[prefix+'matrix_train']-kernel.get_kernel_matrix()).flat)

	kernel.init(feats['train'], feats['test'])
	pie.set_features(feats['test'])
	km_test=max(abs(
		indata[prefix+'matrix_test']-kernel.get_kernel_matrix()).flat)
	classified=max(abs(
		pie.apply().get_values()-indata['classifier_classified']))

	return util.check_accuracy(indata[prefix+'accuracy'],
		km_train=km_train, km_test=km_test, classified=classified)
示例#31
0
def _evaluate(indata):
    alphas = 0
    bias = 0
    sv = 0

    if indata.has_key('regression_bias'):
        [bias, weights] = sg('get_svm')
        weights = weights.T
        bias = abs(bias - indata['regression_bias'])
        for item in weights[0].tolist():
            alphas += item
        alphas = abs(alphas - indata['regression_alpha_sum'])
        for item in weights[1].tolist():
            sv += item
        sv = abs(sv - indata['regression_sv_sum'])

    classified = max(abs(sg('classify') - indata['regression_classified']))

    return util.check_accuracy(indata['regression_accuracy'],
                               alphas=alphas,
                               bias=bias,
                               support_vectors=sv,
                               classified=classified)
示例#32
0
def _evaluate_custom (indata, prefix):
	feats={
		'train': RealFeatures(indata[prefix+'data']),
		'test': RealFeatures(indata[prefix+'data'])
	}

	symdata=indata[prefix+'symdata']
	lowertriangle=array([symdata[(x,y)] for x in xrange(symdata.shape[1])
		for y in xrange(symdata.shape[0]) if y<=x])
	kernel=CustomKernel()
	kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
	triangletriangle=max(abs(
		indata[prefix+'matrix_triangletriangle']-kernel.get_kernel_matrix()).flat)
	kernel.set_triangle_kernel_matrix_from_full(indata[prefix+'symdata'])
	fulltriangle=max(abs(
		indata[prefix+'matrix_fulltriangle']-kernel.get_kernel_matrix()).flat)
	kernel.set_full_kernel_matrix_from_full(indata[prefix+'data'])
	fullfull=max(abs(
		indata[prefix+'matrix_fullfull']-kernel.get_kernel_matrix()).flat)

	return util.check_accuracy(indata[prefix+'accuracy'],
		triangletriangle=triangletriangle, fulltriangle=fulltriangle,
		fullfull=fullfull)
示例#33
0
def _evaluate_top_fisher (indata, prefix):
	raise NotImplementedError, 'TOP/Fisher not yet supported in static interfaces.'

	sg('new_hmm', indata[prefix+'N'], indata[prefix+'M'])
	pos=HMM(wordfeats['train'], indata[prefix+'N'], indata[prefix+'M'],
		indata[prefix+'pseudo'])
	pos.train()
	pos.baum_welch_viterbi_train(BW_NORMAL)
	neg=HMM(wordfeats['train'], indata[prefix+'N'], indata[prefix+'M'],
		indata[prefix+'pseudo'])
	neg.train()
	neg.baum_welch_viterbi_train(BW_NORMAL)
	pos_clone=HMM(pos)
	neg_clone=HMM(neg)
	pos_clone.set_observations(wordfeats['test'])
	neg_clone.set_observations(wordfeats['test'])

	if indata[prefix+'type']=='TOP':
		feats['train']=TOPFeatures(10, pos, neg, False, False)
		feats['test']=TOPFeatures(10, pos_clone, neg_clone, False, False)
	else:
		feats['train']=FKFeatures(10, pos, neg)
		feats['train'].set_opt_a(-1) #estimate prior
		feats['test']=FKFeatures(10, pos_clone, neg_clone)
		feats['test'].set_a(feats['train'].get_a()) #use prior from training data

	prefix='kernel_'
	args=util.get_args(indata, prefix)
	kernel=PolyKernel(feats['train'], feats['train'], *args)
	km_train=max(abs(
		indata[prefix+'matrix_train']-kernel.get_kernel_matrix()).flat)
	kernel.init(feats['train'], feats['test'])
	km_test=max(abs(
		indata[prefix+'matrix_test']-kernel.get_kernel_matrix()).flat)

	return util.check_accuracy(indata[prefix+'accuracy'],
		km_train=km_train, km_test=km_test)
示例#34
0
def _evaluate_pie(indata, prefix):
    pie = PluginEstimate()
    feats = util.get_features(indata, prefix)
    labels = BinaryLabels(double(indata['classifier_labels']))
    pie.set_labels(labels)
    pie.set_features(feats['train'])
    pie.train()

    fun = eval(indata[prefix + 'name'] + 'Kernel')
    kernel = fun(feats['train'], feats['train'], pie)
    km_train = max(
        abs(indata[prefix + 'matrix_train'] - kernel.get_kernel_matrix()).flat)

    kernel.init(feats['train'], feats['test'])
    pie.set_features(feats['test'])
    km_test = max(
        abs(indata[prefix + 'matrix_test'] - kernel.get_kernel_matrix()).flat)
    classified = max(
        abs(pie.apply().get_confidences() - indata['classifier_classified']))

    return util.check_accuracy(indata[prefix + 'accuracy'],
                               km_train=km_train,
                               km_test=km_test,
                               classified=classified)
示例#35
0
    alphas = 0
    bias = 0
    sv = 0
    if indata.has_key(prefix + "bias"):
        bias = abs(regression.get_bias() - indata[prefix + "bias"])
    if indata.has_key(prefix + "alphas"):
        for item in regression.get_alphas().tolist():
            alphas += item
        alphas = abs(alphas - indata[prefix + "alphas"])
    if indata.has_key(prefix + "support_vectors"):
        for item in inregression.get_support_vectors().tolist():
            sv += item
        sv = abs(sv - indata[prefix + "support_vectors"])

    kernel.init(feats["train"], feats["test"])
    classified = max(abs(regression.apply().get_labels() - indata[prefix + "classified"]))

    return util.check_accuracy(
        indata[prefix + "accuracy"], alphas=alphas, bias=bias, support_vectors=sv, classified=classified
    )


########################################################################
# public
########################################################################


def test(indata):
    return _evaluate(indata)
示例#36
0
def _evaluate (indata):
	prefix='classifier_'
	ctype=indata[prefix+'type']
	if indata[prefix+'name']=='KNN':
		feats=util.get_features(indata, 'distance_')
	elif ctype=='kernel':
		feats=util.get_features(indata, 'kernel_')
	else:
		feats=util.get_features(indata, prefix)

	machine=_get_machine(indata, prefix, feats)

	try:
		fun=eval(indata[prefix+'name'])
	except NameError as e:
		print("%s is disabled/unavailable!"%indata[prefix+'name'])
		return False

	# cannot refactor into function, because labels is unrefed otherwise
	if prefix+'labels' in indata:
		labels=BinaryLabels(double(indata[prefix+'labels']))
		if ctype=='kernel':
			classifier=fun(indata[prefix+'C'], machine, labels)
		elif ctype=='linear':
			classifier=fun(indata[prefix+'C'], feats['train'], labels)
		elif ctype=='knn':
			classifier=fun(indata[prefix+'k'], machine, labels)
		elif ctype=='lda':
			classifier=fun(indata[prefix+'gamma'], feats['train'], labels)
		elif ctype=='perceptron':
			classifier=fun(feats['train'], labels)
		elif ctype=='wdsvmocas':
			classifier=fun(indata[prefix+'C'], indata[prefix+'degree'],
				indata[prefix+'degree'], feats['train'], labels)
		else:
			return False
	else:
		classifier=fun(indata[prefix+'C'], machine)

	if classifier.get_name() == 'LibLinear':
		print(classifier.get_name(), "yes")
		classifier.set_liblinear_solver_type(L2R_LR)

	classifier.parallel.set_num_threads(indata[prefix+'num_threads'])
	if ctype=='linear':
		if prefix+'bias' in indata:
			classifier.set_bias_enabled(True)
		else:
			classifier.set_bias_enabled(False)
	if ctype=='perceptron':
		classifier.set_learn_rate=indata[prefix+'learn_rate']
		classifier.set_max_iter=indata[prefix+'max_iter']
	if prefix+'epsilon' in indata:
		try:
			classifier.set_epsilon(indata[prefix+'epsilon'])
		except AttributeError:
			pass
	if prefix+'max_train_time' in indata:
		classifier.set_max_train_time(indata[prefix+'max_train_time'])
	if prefix+'linadd_enabled' in indata:
		classifier.set_linadd_enabled(indata[prefix+'linadd_enabled'])
	if prefix+'batch_enabled' in indata:
		classifier.set_batch_computation_enabled(indata[prefix+'batch_enabled'])

	classifier.train()

	res=_get_results(indata, prefix, classifier, machine, feats)
	return util.check_accuracy(res['accuracy'],
		alphas=res['alphas'], bias=res['bias'], sv=res['sv'],
		classified=res['classified'])
示例#37
0
	if ctype=='perceptron':
		classifier.set_learn_rate=indata[prefix+'learn_rate']
		classifier.set_max_iter=indata[prefix+'max_iter']
	if indata.has_key(prefix+'epsilon'):
		try:
			classifier.set_epsilon(indata[prefix+'epsilon'])
		except AttributeError:
			pass
	if indata.has_key(prefix+'max_train_time'):
		classifier.set_max_train_time(indata[prefix+'max_train_time'])
	if indata.has_key(prefix+'linadd_enabled'):
		classifier.set_linadd_enabled(indata[prefix+'linadd_enabled'])
	if indata.has_key(prefix+'batch_enabled'):
		classifier.set_batch_computation_enabled(indata[prefix+'batch_enabled'])

	classifier.train()

	res=_get_results(indata, prefix, classifier, machine, feats)
	return util.check_accuracy(res['accuracy'],
		alphas=res['alphas'], bias=res['bias'], sv=res['sv'],
		classified=res['classified'])


########################################################################
# public
########################################################################

def test (indata):
	return _evaluate(indata)

示例#38
0
    if indata.has_key(prefix + 'epsilon'):
        try:
            classifier.set_epsilon(indata[prefix + 'epsilon'])
        except AttributeError:
            pass
    if indata.has_key(prefix + 'max_train_time'):
        classifier.set_max_train_time(indata[prefix + 'max_train_time'])
    if indata.has_key(prefix + 'linadd_enabled'):
        classifier.set_linadd_enabled(indata[prefix + 'linadd_enabled'])
    if indata.has_key(prefix + 'batch_enabled'):
        classifier.set_batch_computation_enabled(indata[prefix +
                                                        'batch_enabled'])

    classifier.train()

    res = _get_results(indata, prefix, classifier, machine, feats)
    return util.check_accuracy(res['accuracy'],
                               alphas=res['alphas'],
                               bias=res['bias'],
                               sv=res['sv'],
                               classified=res['classified'])


########################################################################
# public
########################################################################


def test(indata):
    return _evaluate(indata)
示例#39
0
    if ctype == "perceptron":
        classifier.set_learn_rate = indata[prefix + "learn_rate"]
        classifier.set_max_iter = indata[prefix + "max_iter"]
    if indata.has_key(prefix + "epsilon"):
        try:
            classifier.set_epsilon(indata[prefix + "epsilon"])
        except AttributeError:
            pass
    if indata.has_key(prefix + "max_train_time"):
        classifier.set_max_train_time(indata[prefix + "max_train_time"])
    if indata.has_key(prefix + "linadd_enabled"):
        classifier.set_linadd_enabled(indata[prefix + "linadd_enabled"])
    if indata.has_key(prefix + "batch_enabled"):
        classifier.set_batch_computation_enabled(indata[prefix + "batch_enabled"])

    classifier.train()

    res = _get_results(indata, prefix, classifier, machine, feats)
    return util.check_accuracy(
        res["accuracy"], alphas=res["alphas"], bias=res["bias"], sv=res["sv"], classified=res["classified"]
    )


########################################################################
# public
########################################################################


def test(indata):
    return _evaluate(indata)
示例#40
0
    sv = 0
    if indata.has_key(prefix + 'bias'):
        bias = abs(regression.get_bias() - indata[prefix + 'bias'])
    if indata.has_key(prefix + 'alphas'):
        for item in regression.get_alphas().tolist():
            alphas += item
        alphas = abs(alphas - indata[prefix + 'alphas'])
    if indata.has_key(prefix + 'support_vectors'):
        for item in inregression.get_support_vectors().tolist():
            sv += item
        sv = abs(sv - indata[prefix + 'support_vectors'])

    kernel.init(feats['train'], feats['test'])
    classified = max(
        abs(regression.apply().get_labels() - indata[prefix + 'classified']))

    return util.check_accuracy(indata[prefix + 'accuracy'],
                               alphas=alphas,
                               bias=bias,
                               support_vectors=sv,
                               classified=classified)


########################################################################
# public
########################################################################


def test(indata):
    return _evaluate(indata)
示例#41
0
def _evaluate(indata):
    prefix = 'classifier_'
    ctype = indata[prefix + 'type']
    if indata[prefix + 'name'] == 'KNN':
        feats = util.get_features(indata, 'distance_')
    elif ctype == 'kernel':
        feats = util.get_features(indata, 'kernel_')
    else:
        feats = util.get_features(indata, prefix)

    machine = _get_machine(indata, prefix, feats)

    try:
        fun = eval(indata[prefix + 'name'])
    except NameError as e:
        print("%s is disabled/unavailable!" % indata[prefix + 'name'])
        return False

    # cannot refactor into function, because labels is unrefed otherwise
    if prefix + 'labels' in indata:
        labels = BinaryLabels(double(indata[prefix + 'labels']))
        if ctype == 'kernel':
            classifier = fun(indata[prefix + 'C'], machine, labels)
        elif ctype == 'linear':
            classifier = fun(indata[prefix + 'C'], feats['train'], labels)
        elif ctype == 'knn':
            classifier = fun(indata[prefix + 'k'], machine, labels)
        elif ctype == 'lda':
            classifier = fun(indata[prefix + 'gamma'], feats['train'], labels)
        elif ctype == 'perceptron':
            classifier = fun(feats['train'], labels)
        elif ctype == 'wdsvmocas':
            classifier = fun(indata[prefix + 'C'], indata[prefix + 'degree'],
                             indata[prefix + 'degree'], feats['train'], labels)
        else:
            return False
    else:
        classifier = fun(indata[prefix + 'C'], machine)

    if classifier.get_name() == 'LibLinear':
        print(classifier.get_name(), "yes")
        classifier.set_liblinear_solver_type(L2R_LR)

    classifier.parallel.set_num_threads(indata[prefix + 'num_threads'])
    if ctype == 'linear':
        if prefix + 'bias' in indata:
            classifier.set_bias_enabled(True)
        else:
            classifier.set_bias_enabled(False)
    if ctype == 'perceptron':
        classifier.set_learn_rate = indata[prefix + 'learn_rate']
        classifier.set_max_iter = indata[prefix + 'max_iter']
    if prefix + 'epsilon' in indata:
        try:
            classifier.set_epsilon(indata[prefix + 'epsilon'])
        except AttributeError:
            pass
    if prefix + 'max_train_time' in indata:
        classifier.set_max_train_time(indata[prefix + 'max_train_time'])
    if prefix + 'linadd_enabled' in indata:
        classifier.set_linadd_enabled(indata[prefix + 'linadd_enabled'])
    if prefix + 'batch_enabled' in indata:
        classifier.set_batch_computation_enabled(indata[prefix +
                                                        'batch_enabled'])

    classifier.train()

    res = _get_results(indata, prefix, classifier, machine, feats)
    return util.check_accuracy(res['accuracy'],
                               alphas=res['alphas'],
                               bias=res['bias'],
                               sv=res['sv'],
                               classified=res['classified'])