Пример #1
0
    def lrp(self,R,*args,**kwargs):
        # just propagate R further down.
        # makes sure subroutines never get called.
        #return R*self.X
        data_io.write(R,'../r_array/softmax.npy')
#         print("softmax is saved")
        return R
Пример #2
0
    def lrp(self, R, *args, **kwargs):

        # just propagate R further down.
        # makes sure subroutines never get called.
        #         logger.info("=======================Lrp Flatten Check===============================")
        #         logger.info("Flatten LRP is {0}".format(R.shape))
        #         logger.info("the Flatten LRP value is : {0}".format(np.sum(~np.isnan(R))))
        #         Rpy = np.zeros_like(R,dtype=np.float)
        #         print("check : {}!!".format(Rpy.shape))
        #         print(self.inputshape)
        Rsave = R
        Rx = np.reshape(R, self.inputshape)
        #         logger.info("Flatten LRP checking is {0}".format(np.reshape(R, self.inputshape).shape))
        if Rx.shape[2] == 1:
            Rsave2 = np.reshape(Rsave, [10, 1])
            Rlim2 = np.reshape(Rx, [10, 1])
            Rsave2 = np.concatenate((Rsave2, Rlim2), axis=1)
            Rfile2 = '../r_array/flat_4.npy'
            data_io.write(Rsave2, Rfile2)
#             print('flatten 4 passed')
        else:
            data_io.write(Rx, '../r_array/flatten.npy')
#             print("flatten is saved")
#         logger.info("=======================Lrp Flatten Done===============================")
        return np.reshape(R, self.inputshape)
Пример #3
0
    def _simple_lrp_slow(self, R):
        N, H, W, D = self.X.shape

        hpool, wpool = self.pool
        hstride, wstride = self.stride

        #assume the given pooling and stride parameters are carefully chosen.
        Hout = (H - hpool) // hstride + 1
        Wout = (W - wpool) // wstride + 1

        Rx = np.zeros_like(self.X, dtype=np.float)

        for i in range(Hout):
            for j in range(Wout):
                Z = self.Y[:, i:i + 1, j:j +
                           1, :] == self.X[:, i * hstride:i * hstride + hpool,
                                           j * wstride:j * wstride + wpool, :]
                Zs = Z.sum(axis=(1, 2), keepdims=True, dtype=np.float
                           )  #thanks user wodtko for reporting this bug/fix
                Rx[:, i * hstride:i * hstride + hpool,
                   j * wstride:j * wstride +
                   wpool, :] += (Z / Zs) * R[:, i:i + 1, j:j + 1, :]
                Rfile = '../r_array/maxpool' + str(i) + '_Hout_' + str(
                    j) + '_Wout' + '.npy'
                data_io.write(Rx, Rfile)
#                 print("maxpool Rx is saved")
        data_io.write(Rx, '../r_array/maxpool.npy')
        #         print("maxpolling Rx is saved")
        return Rx
Пример #4
0
 def lrp(self, R, *args, **kwargs):
     # component-wise operations within this layer
     # ->
     # just propagate R further down.
     # makes sure subroutines never get called.
     data_io.write(R, '../r_array/tanh.npy')
     #         print("tanh is saved")
     return R
Пример #5
0
def baseline(output_dir, basename, valid_num, test_num, target_num):
	preds_valid = np.zeros([valid_num , target_num])
	preds_test = np.zeros([test_num , target_num])
	
	cycle = 0
	filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict'
	data_io.write(os.path.join(output_dir,filename_valid), preds_valid)
	filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict'
	data_io.write(os.path.join(output_dir,filename_test), preds_test)
Пример #6
0
def write_all_zeros(output_dir, basename, valid_num, test_num, target_num):
    #if something break, to have 0 prediction

    preds_valid = np.zeros([valid_num, target_num])
    preds_test = np.zeros([test_num, target_num])

    cycle = 0
    filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict'
    data_io.write(os.path.join(output_dir, filename_valid), preds_valid)
    filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict'
    data_io.write(os.path.join(output_dir, filename_test), preds_test)
Пример #7
0
def write_all_zeros(output_dir, basename, valid_num, test_num, target_num):
	#if something break, to have 0 prediction
	
	preds_valid = np.zeros([valid_num , target_num])
	preds_test = np.zeros([test_num , target_num])
	
	cycle = 0
	filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict'
	data_io.write(os.path.join(output_dir,filename_valid), preds_valid)
	filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict'
	data_io.write(os.path.join(output_dir,filename_test), preds_test)
Пример #8
0
 def forward(self, X, *args, **kwargs):
     self.Y = np.maximum(0, X)
     #         logger.info("=======================Rect Check===============================")
     #         logger.info("the Recification input of shape is {0}".format(X.shape))
     #         logger.info("the Recification input shape value is : {0}".format(np.sum(~np.isnan(X))))
     input_check = '../r_array/Rect_input({},{},{}).npy'.format(
         X.shape[0], X.shape[1], X.shape[2])
     data_io.write(self.Y, input_check)
     #         logger.info("the Recification output of shape is {0}".format(self.Y.shape))
     #         logger.info("the Recification output value is : {0}".format(np.sum(~np.isnan(self.Y))))
     output_check = '../r_array/Rect_out({},{},{}).npy'.format(
         self.Y.shape[0], self.Y.shape[1], self.Y.shape[2])
     data_io.write(self.Y, output_check)
     #         logger.info("=======================Rect Done===============================")
     return self.Y
Пример #9
0
    def _epsilon_lrp(self,R,epsilon):
        '''
        LRP according to Eq(58) in DOI: 10.1371/journal.pone.0130140
        '''

        Zs = self.Y + epsilon * ((self.Y >= 0)*2-1)#prepare stabilized denominator

        # Has the forward pass been computed lrp-aware?
        # This exchanges time spent in the forward pass for lower LRP time
        # and is useful, if e.g. several parameter settings for LRP need to be evaluated
        # for the same input data.
        if self.lrp_aware:
            return (self.Z * (R/Zs)[:,na,:]).sum(axis=2)
        else:
            Z = self.W[na,:,:]*self.X[:,:,na] #localized preactivations
            Zx = (Z * (R/Zs)[:,na,:]).sum(axis =2)
            data_io.write(Zx,'../r_array/linear.npy')
#             print("linear is saved")
            return (Z * (R/Zs)[:,na,:]).sum(axis=2)
Пример #10
0
def predict(datanames, input_dir):
    """ Main function. """
    overall_time_budget = 0
    res_dir = os.path.join(CONFIG['root_dir'], "res")

    for basename in datanames:
        print "\n*** Processing dataset %s" % basename.upper()
        start = time.time()

        D = DataManager(basename,
                        input_dir,
                        replace_missing=False,
                        filter_features=False,
                        verbose=False)

        # Set overall time budget with this dataset's allocated time
        time_budget = int(0.8 * D.info['time_budget'])

        overall_time_budget = overall_time_budget + time_budget
        read_time = time.time() - start
        ts = time.time()

        aml = AutoML(D, CONFIG)
        aml.run_predict(time_budget)

        run_time = time.time() - ts
        end = time.time()

        print "* Time:: budget=%5.2f, load=%5.2f, run=%5.2f, remaining=%5.2f" \
            % (time_budget, read_time, run_time, time_budget - (end - start))

        for i, res in enumerate(aml._Y):
            filename = basename + "_valid_" + str(i).zfill(3) + ".predict"
            data_io.write(
                os.path.join(res_dir, filename), aml._Y[i]['Y_valid'])
            filename = basename + "_test_" + str(i).zfill(3) + ".predict"
            data_io.write(
                os.path.join(res_dir, filename), aml._Y[i]['Y_test'])

    return True
Пример #11
0
    def lrp(self, R, *args, **kwargs):
        # component-wise operations within this layer
        # ->
        # just propagate R further down.
        # makes sure subroutines never get called.
        #         logger.info("=======================LRP Rect Check===============================")
        #         logger.info("the Rect LRP input shape is {0}".format(R.shape))
        #         logger.info("the Rect LRP input value is : {0}".format(np.sum(~np.isnan(R))))
        Rx = np.zeros_like(R, dtype=np.float)
        Rsave = Rx
        if R.shape[2] == 2:
            Rsave3 = np.reshape(Rsave, [10, 40])
            Rlim3 = np.reshape(R, [10, 40])
            Rsave3 = np.concatenate((Rsave3, Rlim3), axis=1)
            Rfile3 = '../r_array/rec_3.npy'
            data_io.write(Rsave3, Rfile3)
        elif R.shape[2] == 10:
            Rsave2 = np.reshape(Rsave, [10, 250])
            Rlim2 = np.reshape(R, [10, 250])
            Rsave2 = np.concatenate((Rsave2, Rlim2), axis=1)
            Rfile2 = '../r_array/rec_2.npy'
            data_io.write(Rsave2, Rfile2)
        else:
            Rsave1 = np.reshape(Rsave, [28, 280])
            Rlim1 = np.reshape(R, [28, 280])
            Rsave1 = np.concatenate((Rsave1, Rlim1), axis=1)
            Rfile1 = '../r_array/rec_1.npy'
            data_io.write(Rsave1, Rfile1)
        data_io.write(R, '../r_array/rect.npy')
        #         logger.info("the Recification LRP shape is {0}".format(R.shape))
        lrp_check = '../r_array/rect_lrp({},{},{}).npy'.format(
            R.shape[0], R.shape[1], R.shape[2])
        data_io.write(R, lrp_check)
        #         logger.info("the Rect LRP output shape is {0}".format(R.shape))
        #         logger.info("the Rect LRP output value is : {0}".format(np.sum(~np.isnan(R))))

        #         logger.info("=======================LRP Rect Done===============================")
        return R
Пример #12
0
    print("Lrp R shape {} : ".format(Rinit.shape))
    #compute first layer relevance according to prediction
    #R = nn.lrp(Rinit)                   #as Eq(56) from DOI: 10.1371/journal.pone.0130140
    R = nn.lrp(Rinit,'epsilon',1.)

    R = R.sum(axis=3)
    xs = ((x+1.)/2.).sum(axis=3)

    if not np == numpy: 
        xs = np.asnumpy(xs)
        R = np.asnumpy(R)

    digit = render.digit_to_rgb(xs, scaling = 3)
    hm = render.hm_to_rgb(R, X = xs, scaling = 3, sigma = 2)
    digit_hm = render.save_image([digit,hm],'../heatmap.png')
    data_io.write(R,'../heatmap.npy')
    data_io.write(xs,'../xs.npy')
    print(xs.shape)
    y = xs
    a = np.load('../r_array/convolution.npy')
    a = np.reshape(a,[a.shape[1]*a.shape[2],1])
    b = np.load('../r_array/rect.npy')    
    b = np.pad(b,((0,0),(2,2),(2,2),(0,0)))
    b = np.reshape(b,[b.shape[1]*b.shape[2],b.shape[0]*b.shape[3]])
    c = np.load('../r_array/sumpoll.npy')
    c = np.pad(c,((0,0),(2,2),(2,2),(0,0)))
    c = np.reshape(c,[c.shape[1]*c.shape[2],c.shape[3]])
    
    new_b = np.hstack((b, c))
    new = np.hstack((a, new_b))
    y = np.reshape(y, [y.shape[0]*y.shape[1]*y.shape[2]])
Пример #13
0
def predict (LD, output_dir, basename):
	import copy
	import os
	import numpy as np
	import libscores
	import data_converter
	from sklearn import preprocessing, ensemble
	from sklearn.utils import shuffle

	
	LD.data['X_train'], LD.data['Y_train'] = shuffle(LD.data['X_train'], LD.data['Y_train'] , random_state=1)
	
	Y_train = LD.data['Y_train']
	X_train = LD.data['X_train']
	
	Xta = np.copy(X_train)

	X_valid = LD.data['X_valid']
	X_test = LD.data['X_test']
	
	
	Xtv = np.copy(X_valid)
	Xts = np.copy(X_test)
	

	import xgboost as xgb
	if LD.info['name']== 'albert':
		model = xgb.XGBClassifier(max_depth=6, learning_rate=0.05, n_estimators=1800, silent=True, 
				objective='binary:logistic', nthread=6, gamma=0.6, 
				min_child_weight=0.7, max_delta_step=0, subsample=1, 
				colsample_bytree=1, base_score=0.5, seed=0, missing=None)

	if LD.info['name']== 'dilbert':
		model = xgb.XGBClassifier(max_depth=4, learning_rate=0.1, n_estimators=1000, silent=True, 
				objective='multi:softprob', nthread=-1, gamma=0, 
				min_child_weight=0, max_delta_step=0, subsample=1, 
				colsample_bytree=1, base_score=0.5, seed=0, missing=None)
	if LD.info['name']== 'fabert':
		model = xgb.XGBClassifier(max_depth=6, learning_rate=0.1, n_estimators=1200, silent=True, 
				objective='multi:softprob', nthread=-1, gamma=0, 
				min_child_weight=1, max_delta_step=0, subsample=1, 
				colsample_bytree=1, base_score=0.5, seed=0, missing=None)
	if LD.info['name']== 'robert':
		model = xgb.XGBClassifier(max_depth=6, learning_rate=0.1, n_estimators=600, silent=True, 
				objective='multi:softprob', nthread=-1, gamma=0, 
				min_child_weight=1, max_delta_step=0, subsample=1, 
				colsample_bytree=1, base_score=0.5, seed=0, missing=None)
	if LD.info['name']== 'volkert':
		from sklearn import  ensemble, preprocessing
		
		p = preprocessing.PolynomialFeatures()
		prep = ensemble.RandomForestRegressor(n_estimators=24, n_jobs=-1, random_state=0, verbose=1)
		
		prep.fit(Xta,Y_train)		
		Xta = Xta [:, prep.feature_importances_.argsort()[-50:][::-1]]
		Xtv = Xtv [:, prep.feature_importances_.argsort()[-50:][::-1]]
		Xts = Xts [:, prep.feature_importances_.argsort()[-50:][::-1]]
		
		
		Xta = p.fit_transform(Xta)
		Xtv = p.fit_transform(Xtv)
		Xts = p.fit_transform(Xts)
		
		prep.fit(Xta,Y_train)		
		Xta = Xta [:, prep.feature_importances_.argsort()[-800:][::-1]]
		Xtv = Xtv [:, prep.feature_importances_.argsort()[-800:][::-1]]
		Xts = Xts [:, prep.feature_importances_.argsort()[-800:][::-1]]
							
		X_train = np.hstack([X_train, Xta])
		X_valid = np.hstack([X_valid, Xtv])
		X_test = np.hstack([X_test, Xts])
		
		model = xgb.XGBClassifier(max_depth=6, learning_rate=0.1, n_estimators=350, silent=True, 
				objective='multi:softprob', nthread=-1, gamma=0, 
				min_child_weight=1, max_delta_step=0, subsample=1, 
				colsample_bytree=1, base_score=0.5, seed=0, missing=None)
		

	model.fit(X_train, Y_train)
	
	preds_valid = model.predict_proba(X_valid)
	preds_test = model.predict_proba(X_test)
				
	
	import data_io
	if  LD.info['target_num']  == 1:
		preds_valid = preds_valid[:,1]
		preds_test = preds_test[:,1]
								
	preds_valid = np.clip(preds_valid,0,1)
	preds_test = np.clip(preds_test,0,1)
	
	data_io.write(os.path.join(output_dir, basename + '_valid_000.predict'), preds_valid)
	data_io.write(os.path.join(output_dir,basename + '_test_000.predict'), preds_test)
Пример #14
0
def predict (LD, output_dir, basename):
	
	import os
	import numpy as np
	import random
	import data_converter
	from sklearn import preprocessing, decomposition
	from sklearn.utils import shuffle
	import time
	from sklearn.externals import joblib
	
	from lasagne import layers
	from lasagne.updates import nesterov_momentum
	from lasagne.updates import norm_constraint
	import lasagne
	import theano
	import theano.tensor as T
	from lasagne.regularization import regularize_layer_params, regularize_layer_params_weighted, l2, l1
	from lasagne.updates import norm_constraint, total_norm_constraint
	np.random.seed(0)
	random.seed(0)

	LD.data['X_train'], LD.data['Y_train'] = shuffle(LD.data['X_train'], LD.data['Y_train'] , random_state=1)
	X_train = LD.data['X_train']
	X_valid = LD.data['X_valid']
	X_test = LD.data['X_test']
	
	fs = decomposition.PCA(n_components=27)
	fs.fit(X_train)
	X_train2 = fs.transform(X_train)
	X_valid2 = fs.transform(X_valid)
	X_test2 = fs.transform(X_test)
	
	X_train = np.hstack([X_train, X_train2])
	X_valid = np.hstack([X_valid, X_valid2])
	X_test = np.hstack([X_test, X_test2])
	
	normx = preprocessing.StandardScaler(with_mean=True)
	
	normx.fit(X_train)
	X_train = normx.transform(X_train)
	X_valid = normx.transform(X_valid)
	X_test = normx.transform(X_test)
	
	X_train = np.float32(X_train)
	X_valid = np.float32(X_valid)
	X_test = np.float32(X_test)
	
	try:
		y_train = np.array(data_converter.convert_to_bin(LD.data['Y_train'], len(np.unique(LD.data['Y_train'])), False))
		y_train = np.int16(y_train)
	except:
		y_train = np.copy(LD.data['Y_train'])

	
	def batches(X, y, csize, rs):
		X, y = shuffle(X, y, random_state=rs)
		for cstart in range(0, len(X) - csize+1, csize):
			Xc = X[cstart:cstart+csize] 
			yc = y[cstart:cstart+csize]
			yield  Xc, yc
	
	input_var = T.matrix('inputs')
	target_var = T.matrix('targets')
	
	l_in = lasagne.layers.InputLayer(shape=(None, X_train.shape[1]),
	     input_var=input_var,
	     nonlinearity=None,)
	     
	l_hid1 = lasagne.layers.DenseLayer(
	    l_in, num_units= 500,
	    nonlinearity=lasagne.nonlinearities.sigmoid,
	    W=lasagne.init.Sparse()
	    )


	l_hid2 = lasagne.layers.DenseLayer(
	    l_hid1, num_units= 500,
	    nonlinearity=lasagne.nonlinearities.sigmoid,
	    W=lasagne.init.GlorotUniform())

	Lnum_out_units = 100
	
	l_out = lasagne.layers.DenseLayer(
		l_hid2, num_units=Lnum_out_units,
		nonlinearity=lasagne.nonlinearities.softmax)

	network = l_out
	
	prediction = lasagne.layers.get_output(network)

	loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
	loss = loss.mean()
	loss2 = lasagne.objectives.multiclass_hinge_loss(prediction, target_var, delta=0.6)
	loss2 = loss2.mean()
	
	loss = loss*2 + loss2
	
	params = lasagne.layers.get_all_params(network, trainable=True)
	
	updates = lasagne.updates.adam(loss, params, learning_rate=0.0002, beta1=0.95, beta2=0.999 )
	
	train_fn = theano.function([input_var, target_var], loss, updates=updates)

	for epoch in range(150):	
		train_err = 0
		train_batches = 0
		for batch in batches(X_train, y_train, 60 + int(epoch/10), epoch):
		    Xt, yt = batch
		    train_err += train_fn(Xt, yt)
		    train_batches += 1
		
	xml1 = T.matrix('xml1')
	Xlt1 = lasagne.layers.get_output(l_out, xml1, deterministic=True)
	f2 = theano.function([xml1], Xlt1)
		
	preds_valid = f2(X_valid)
	preds_test = f2(X_test)
			

	import data_io
	if  LD.info['target_num']  == 1:
		preds_valid = preds_valid[:,1]
		preds_test = preds_test[:,1]
								
	preds_valid = np.clip(preds_valid,0,1)
	preds_test = np.clip(preds_test,0,1)
	
	cycle = 0 
	filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict'
	data_io.write(os.path.join(output_dir,filename_valid), preds_valid)
	filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict'
	data_io.write(os.path.join(output_dir,filename_test), preds_test)
Пример #15
0
def predict(LD, output_dir, basename):
    import copy
    import os
    import numpy as np
    import libscores
    import data_converter
    from sklearn import preprocessing, ensemble
    from sklearn.utils import shuffle

    LD.data['X_train'], LD.data['Y_train'] = shuffle(LD.data['X_train'],
                                                     LD.data['Y_train'],
                                                     random_state=1)

    Y_train = LD.data['Y_train']
    X_train = LD.data['X_train']

    Xta = np.copy(X_train)

    X_valid = LD.data['X_valid']
    X_test = LD.data['X_test']

    Xtv = np.copy(X_valid)
    Xts = np.copy(X_test)

    import xgboost as xgb
    if LD.info['name'] == 'alexis':

        model = ensemble.RandomForestClassifier(max_depth=140,
                                                n_estimators=1800,
                                                n_jobs=-1,
                                                random_state=0,
                                                verbose=0,
                                                warm_start=True)
        model2 = ensemble.RandomForestClassifier(max_depth=140,
                                                 n_estimators=1800,
                                                 n_jobs=-1,
                                                 random_state=1,
                                                 verbose=0,
                                                 warm_start=True)
        model.fit(X_train, Y_train)
        model2.fit(X_train, Y_train)

        preds_valid0 = model.predict_proba(X_valid)
        preds_test0 = model.predict_proba(X_test)

        preds_valid2 = model2.predict_proba(X_valid)
        preds_test2 = model2.predict_proba(X_test)

        preds_valid0 = np.array(preds_valid0)
        preds_valid2 = np.array(preds_valid2)

        preds_test0 = np.array(preds_test0)
        preds_test2 = np.array(preds_test2)

        preds_valid = (preds_valid0 + preds_valid2) / 2
        preds_test = (preds_test0 + preds_test2) / 2

        preds_valid = preds_valid[:, :, 1]
        preds_valid = preds_valid.T

        preds_test = preds_test[:, :, 1]
        preds_test = preds_test.T

    if LD.info['name'] == 'dionis':
        Lest = 600  #600 will consume cca 250 GB of RAM, use 50 for similar result
        #Lest = 50

        model = ensemble.RandomForestClassifier(n_jobs=-1,
                                                n_estimators=Lest,
                                                random_state=0)
        model.fit(X_train, Y_train)
        preds_valid0 = model.predict_proba(X_valid)
        preds_test0 = model.predict_proba(X_test)

        model = ensemble.RandomForestClassifier(n_jobs=-1,
                                                n_estimators=Lest,
                                                random_state=1)
        model.fit(X_train, Y_train)
        preds_valid1 = model.predict_proba(X_valid)
        preds_test1 = model.predict_proba(X_test)

        model = ensemble.RandomForestClassifier(n_jobs=-1,
                                                n_estimators=Lest,
                                                random_state=2)
        model.fit(X_train, Y_train)
        preds_valid2 = model.predict_proba(X_valid)
        preds_test2 = model.predict_proba(X_test)

        model = ensemble.RandomForestClassifier(n_jobs=-1,
                                                n_estimators=Lest,
                                                random_state=3)
        model.fit(X_train, Y_train)
        preds_valid3 = model.predict_proba(X_valid)
        preds_test3 = model.predict_proba(X_test)

        model = ensemble.RandomForestClassifier(n_jobs=-1,
                                                n_estimators=Lest,
                                                random_state=4)
        model.fit(X_train, Y_train)
        preds_valid4 = model.predict_proba(X_valid)
        preds_test4 = model.predict_proba(X_test)

        preds_valid = (preds_valid0 + preds_valid1 + preds_valid2 +
                       preds_valid3 + preds_valid4
                       )  # /5 should be included (bug)
        preds_test = (preds_test0 + preds_test1 + preds_test2 + preds_test3 +
                      preds_test4)  # /5 should be included (bug)

    if LD.info['name'] == 'grigoris':
        model = ensemble.RandomForestClassifier(criterion='entropy',
                                                max_features=0.05,
                                                max_depth=5,
                                                n_estimators=120,
                                                n_jobs=-1,
                                                random_state=0,
                                                verbose=0)
        model2 = linear_model.LogisticRegression(penalty='l1',
                                                 random_state=1,
                                                 n_jobs=-1,
                                                 C=0.008)
        model3 = ensemble.RandomForestClassifier(criterion='entropy',
                                                 max_features=0.05,
                                                 max_depth=5,
                                                 n_estimators=120,
                                                 n_jobs=-1,
                                                 random_state=1,
                                                 verbose=0)
        model4 = ensemble.RandomForestClassifier(criterion='entropy',
                                                 max_features=0.05,
                                                 max_depth=5,
                                                 n_estimators=120,
                                                 n_jobs=-1,
                                                 random_state=2,
                                                 verbose=0)

        preds_valid = np.zeros((X_valid.shape[0], Y_train.shape[1]))
        preds_test = np.zeros((X_test.shape[0], Y_train.shape[1]))
        for pyt in range(Y_train.shape[1]):
            print pyt
            ytp = Y_train[:, pyt]
            model.fit(X_train, ytp)
            model2.fit(X_train, ytp)
            model3.fit(X_train, ytp)
            model4.fit(X_train, ytp)

            preds1v = model.predict_proba(X_valid)[:, 1]
            preds2v = model2.predict_proba(X_valid)[:, 1]
            preds3v = model3.predict_proba(X_valid)[:, 1]
            preds4v = model4.predict_proba(X_valid)[:, 1]
            predsv = (preds1v + preds2v + preds3v + preds4v) / 4
            preds_valid[:, pyt] = predsv

            preds1t = model.predict_proba(X_test)[:, 1]
            preds2t = model2.predict_proba(X_test)[:, 1]
            preds3t = model3.predict_proba(X_test)[:, 1]
            preds4t = model4.predict_proba(X_test)[:, 1]
            predst = (preds1t + preds2t + preds3t + preds4t) / 4
            preds_test[:, pyt] = predst

    if LD.info['name'] == 'jannis':
        Xd = X_train[Y_train == 0]
        yd = Y_train[Y_train == 0]

        for a in range(18):
            X_train = np.vstack([X_train, Xd])
            Y_train = np.hstack([Y_train, yd])

        Xd = X_train[Y_train == 2]
        yd = Y_train[Y_train == 2]

        X_train = np.vstack([X_train, Xd])
        Y_train = np.hstack([Y_train, yd])

        Y_train_raw = np.array(
            data_converter.convert_to_bin(Y_train, len(np.unique(Y_train)),
                                          False))

        preds_valid = np.zeros((X_valid.shape[0], Y_train_raw.shape[1]))
        preds_test = np.zeros((X_test.shape[0], Y_train_raw.shape[1]))
        for pyt in range(Y_train_raw.shape[1]):
            if pyt == 0:
                Lbs = 0.2
            else:
                Lbs = 0.5

            model = xgb.XGBClassifier(max_depth=30,
                                      learning_rate=0.05,
                                      n_estimators=100,
                                      silent=True,
                                      objective='binary:logistic',
                                      nthread=-1,
                                      gamma=0,
                                      min_child_weight=80,
                                      max_delta_step=1,
                                      subsample=1,
                                      colsample_bytree=1,
                                      base_score=Lbs,
                                      seed=0,
                                      missing=None)

            ytp = Y_train_raw[:, pyt]
            model.fit(X_train, ytp)

            preds1v = model.predict_proba(X_valid)[:, 1]
            preds_valid[:, pyt] = preds1v

            preds1t = model.predict_proba(X_test)[:, 1]
            preds_test[:, pyt] = preds1t

    if LD.info['name'] == 'wallis':
        model = naive_bayes.MultinomialNB(alpha=0.02)

        model2 = xgb.XGBClassifier(max_depth=5,
                                   learning_rate=0.05,
                                   n_estimators=1200,
                                   silent=True,
                                   objective='multi:softprob',
                                   nthread=-1,
                                   gamma=0,
                                   min_child_weight=1,
                                   max_delta_step=0,
                                   subsample=1,
                                   colsample_bytree=1,
                                   base_score=0.5,
                                   seed=0,
                                   missing=None)

        model.fit(X_train, Y_train)
        preds_valid1 = model.predict_proba(X_valid)
        preds_test1 = model.predict_proba(X_test)

        model2.fit(X_train, Y_train)
        preds_valid2 = model2.predict_proba(X_valid)
        preds_test2 = model2.predict_proba(X_test)

        preds_valid = (preds_valid1 + preds_valid2) / 2
        preds_test = (preds_test1 + preds_test2) / 2

    import data_io
    if LD.info['target_num'] == 1:
        preds_valid = preds_valid[:, 1]
        preds_test = preds_test[:, 1]

    preds_valid = np.clip(preds_valid, 0, 1)
    preds_test = np.clip(preds_test, 0, 1)

    data_io.write(os.path.join(output_dir, basename + '_valid_000.predict'),
                  preds_valid)
    data_io.write(os.path.join(output_dir, basename + '_test_000.predict'),
                  preds_test)
Пример #16
0
with tf.Session() as sess:
    sess.run(init)
    saver.restore(sess, model_path)
    for inx in I[:12]:
        test_x = mnist.test.images[inx]
        test_x = (test_x - 0.5) * 2
        test_y = mnist.test.labels[inx]
        relevance = sess.run(R,
                             feed_dict={
                                 x: test_x[np.newaxis, :]
                                 })
        # import pdb; pdb.set_trace()
        pred_y = sess.run(pred,
                          feed_dict={
                              x: test_x[np.newaxis, :]
                              })

        digit = render.digit_to_rgb(test_x, scaling = 3)
        hm = render.hm_to_rgb(relevance, X = test_x, scaling = 3, sigma = 2)
        digit_hm = render.save_image([digit,hm],'./heatmap.png')
        data_io.write(relevance,'./heatmap.npy')

        print ('True Class:     {}'.format(np.argmax(test_y)))
        print ('Predicted Class: {}\n'.format(np.argmax(pred_y)))

        #display the image as written to file
        plt.imshow(digit_hm, interpolation = 'none', cmap=plt.cm.binary)
        plt.axis('off')
        plt.show()
Пример #17
0
def ingestion_fn(dataset_dir,
                 code_dir,
                 time_budget,
                 time_budget_approx,
                 output_dir,
                 score_dir,
                 model_config_name=None,
                 model_config=None):
    #### Check whether everything went well
    ingestion_success = True

    # Parse directories
    root_dir = _HERE(os.pardir)
    ingestion_program_dir = join(root_dir, "ingestion_program")

    if dataset_dir.endswith("run/input") and code_dir.endswith("run/program"):
        logger.debug(
            "Since dataset_dir ends with 'run/input' and code_dir "
            "ends with 'run/program', suppose running on " +
            "CodaLab platform. Modify dataset_dir to 'run/input_data' "
            "and code_dir to 'run/submission'. " +
            "Directory parsing should be more flexible in the code of " +
            "compute worker: we need explicit directories for " +
            "dataset_dir and code_dir.")
        dataset_dir = dataset_dir.replace("run/input", "run/input_data")
        code_dir = code_dir.replace("run/program", "run/submission")

    # Show directories for debugging
    logger.debug("sys.argv = " + str(sys.argv))
    logger.debug("Using dataset_dir: " + dataset_dir)
    logger.debug("Using output_dir: " + output_dir)
    logger.debug("Using ingestion_program_dir: " + ingestion_program_dir)
    logger.debug("Using code_dir: " + code_dir)

    # Our libraries
    path.append(ingestion_program_dir)
    path.append(code_dir)
    # IG: to allow submitting the starting kit as sample submission
    path.append(code_dir + "/sample_code_submission")
    import data_io
    from dataset import AutoDLDataset  # THE class of AutoDL datasets

    data_io.mkdir(output_dir)

    #### INVENTORY DATA (and sort dataset names alphabetically)
    datanames = data_io.inventory_data(dataset_dir)
    #### Delete zip files and metadata file
    datanames = [x for x in datanames if x.endswith(".data")]

    if len(datanames) != 1:
        raise ValueError("{} datasets found in dataset_dir={}!\n".format(
            len(datanames), dataset_dir) +
                         "Please put only ONE dataset under dataset_dir.")

    basename = datanames[0]

    logger.info("************************************************")
    logger.info("******** Processing dataset " + basename[:-5].capitalize() +
                " ********")
    logger.info("************************************************")
    logger.debug("Version: {}. Description: {}".format(VERSION, DESCRIPTION))

    ##### Begin creating training set and test set #####
    logger.info("Reading training set and test set...")
    D_train = AutoDLDataset(os.path.join(dataset_dir, basename, "train"))
    D_test = AutoDLDataset(os.path.join(dataset_dir, basename, "test"))
    ##### End creating training set and test set #####

    ## Get correct prediction shape
    num_examples_test = D_test.get_metadata().size()
    output_dim = D_test.get_metadata().get_output_size()
    correct_prediction_shape = (num_examples_test, output_dim)

    # 20 min for participants to initializing and install other packages
    # try:
    #     init_time_budget = 20 * 60  # time budget for initilization.
    #     timer = Timer()
    #     timer.set(init_time_budget)
    #     with timer.time_limit("Initialization"):

    ##### Begin creating model #####
    logger.info("Creating model...this process should not exceed 20min.")
    from model import Model  # in participants' model.py

    # The metadata of D_train and D_test only differ in sample_count
    M = Model(D_train.get_metadata(),
              model_config_name=model_config_name,
              model_config=model_config)
    ###### End creating model ######

    # except TimeoutException as e:
    #     logger.info("[-] Initialization phase exceeded time budget. Move to train/predict phase")
    # except Exception as e:
    #     logger.error("Failed to initializing model.")
    #     logger.error("Encountered exception:\n" + str(e), exc_info=True)
    #

    # Mark starting time of ingestion
    start = time.time()
    logger.info("=" * 5 + " Start core part of ingestion program. " +
                "Version: {} ".format(VERSION) + "=" * 5)

    write_start_file(output_dir,
                     start_time=start,
                     time_budget=time_budget,
                     task_name=basename.split(".")[0])

    try:
        # Check if the model has methods `train` and `test`.
        for attr in ["train", "test"]:
            if not hasattr(M, attr):
                raise ModelApiError(
                    "Your model object doesn't have the method " +
                    "`{}`. Please implement it in model.py.")

        # Check if model.py uses new done_training API instead of marking
        # stopping by returning None
        use_done_training_api = hasattr(M, "done_training")
        if not use_done_training_api:
            logger.warning(
                "Your model object doesn't have an attribute " +
                "`done_training`. But this is necessary for ingestion " +
                "program to know whether the model has done training " +
                "and to decide whether to proceed more training. " +
                "Please add this attribute to your model.")

        # Keeping track of how many predictions are made
        prediction_order_number = 0

        # Start the CORE PART: train/predict process
        while not (use_done_training_api and M.done_training):
            remaining_time_budget = start + time_budget - time.time()
            # Train the model
            logger.info("Begin training the model...")
            M.train(D_train.get_dataset(),
                    remaining_time_budget=remaining_time_budget)
            logger.info("Finished training the model.")
            # Make predictions using the trained model
            logger.info("Begin testing the model by making predictions " +
                        "on test set...")
            remaining_time_budget = start + time_budget - time.time()
            Y_pred = M.test(D_test.get_dataset(),
                            remaining_time_budget=remaining_time_budget)
            logger.info("Finished making predictions.")
            if Y_pred is None:  # Stop train/predict process if Y_pred is None
                logger.info("The method model.test returned `None`. " +
                            "Stop train/predict process.")
                break
            else:  # Check if the prediction has good shape
                prediction_shape = tuple(Y_pred.shape)
                if prediction_shape != correct_prediction_shape:
                    raise BadPredictionShapeError(
                        "Bad prediction shape! Expected {} but got {}.".format(
                            correct_prediction_shape, prediction_shape))
            remaining_time_budget = start + time_budget_approx - time.time()
            if remaining_time_budget < 0:
                break
            # Write timestamp to 'start.txt'
            write_timestamp(output_dir,
                            predict_idx=prediction_order_number,
                            timestamp=time.time())
            # Prediction files: adult.predict_0, adult.predict_1, ...
            filename_test = basename[:-5] + ".predict_" + str(
                prediction_order_number)
            # Write predictions to output_dir
            data_io.write(os.path.join(output_dir, filename_test), Y_pred)
            prediction_order_number += 1
            logger.info(
                "[+] {0:d} predictions made, time spent so far {1:.2f} sec".
                format(prediction_order_number,
                       time.time() - start))
            remaining_time_budget = start + time_budget_approx - time.time()
            logger.info(
                "[+] Time left {0:.2f} sec".format(remaining_time_budget))

    except Exception as e:
        ingestion_success = False
        logger.info("Failed to run ingestion.")
        logger.error("Encountered exception:\n" + str(e), exc_info=True)

    # Finishing ingestion program
    end_time = time.time()
    overall_time_spent = end_time - start

    # Write overall_time_spent to a end.txt file
    end_filename = "end.txt"
    with open(os.path.join(output_dir, end_filename), "w") as f:
        f.write("ingestion_duration: " + str(overall_time_spent) + "\n")
        f.write("ingestion_success: " + str(int(ingestion_success)) + "\n")
        f.write("end_time: " + str(end_time) + "\n")
        logger.info("Wrote the file {} marking the end of ingestion.".format(
            end_filename))
        if ingestion_success:
            logger.info("[+] Done. Ingestion program successfully terminated.")
            logger.info("[+] Overall time spent %5.2f sec " %
                        overall_time_spent)
        else:
            logger.info(
                "[-] Done, but encountered some errors during ingestion.")
            logger.info("[-] Overall time spent %5.2f sec " %
                        overall_time_spent)

    # Copy all files in output_dir to score_dir
    os.system("cp -R {} {}".format(os.path.join(output_dir, "*"), score_dir))
    logger.debug("Copied all ingestion output to scoring output directory.")

    logger.info("[Ingestion terminated]")
Пример #18
0
def echo_nest_analysis(fname_song, fname_config=None):
    """
    Get track details via Echo Nest API.
    """
    if not fname_config:
        fname_config = 'audio_config.yml'

    fname_config = os.path.abspath(fname_config)
    path_work = os.path.dirname(fname_config)

    path_analysis = os.path.join(path_work, 'Audio Analysis')
    if not os.path.isdir(path_analysis):
        os.mkdir(path_analysis)

    fname_song = os.path.basename(fname_song)
    b, e = os.path.splitext(fname_song)
    #if not (e == '.mp3' or e == '.m4a'):
    #    fname_song = b + '.mp3'

    fname_analysis = b + '.full.yml'

    f = os.path.join(path_analysis, fname_analysis)
    if os.path.isfile(f):
        print('Load existing analysis')
        analysis, meta = data_io.read(f)
    else:
        # Read config.
        info, meta = data_io.read(fname_config)
        if not info['songs']:
            info['songs'] = {}

        if not 'songs' in info:
            info['songs'] = {}

        # Configure Echo Nest API key.
        pyechonest.config.ECHO_NEST_API_KEY = info['api_key']

        # Load track details.
        if fname_song not in info['songs']:
            print('Upload new song to Echo Nest: %s' % fname_song)

            info['songs'][fname_song] = {}

            track = pyechonest.track.track_from_filename(fname_song)

            info['songs'][fname_song]['id'] = track.id
            info['songs'][fname_song]['analysis_url'] = track.analysis_url

            # Save updated config.
            data_io.write(fname_config, info)

        else:
            print('Download song analysis from Echo Nest: %s' % fname_song)
            track = pyechonest.track.track_from_id(info['songs'][fname_song]['id'])

        print('Retrieve full analysis from url')
        r = requests.get(track.analysis_url)
        analysis = r.json()

        print('Save analysis to cache folder')
        f = os.path.join(path_analysis, fname_analysis)
        data_io.write(f, analysis)

    # Done.
    return analysis
Пример #19
0
    def _epsilon_lrp(self, R, epsilon):
        '''
        LRP according to Eq(58) in DOI: 10.1371/journal.pone.0130140
        '''
        N, H, W, D = self.X.shape

        hpool, wpool = self.pool
        hstride, wstride = self.stride
        #         logger.info("=======================Lrp Sumpool Check===============================")
        #         logger.info("the LPR Sumpool x shape is {0}".format(self.X.shape))
        #         logger.info("the LPR Sumpool x shape value is : {0}".format(np.sum(~np.isnan(self.X))))
        #         logger.info("the LPR Sumpool hpool and wppol is {0}".format(self.pool))
        #         logger.info("the LRP Sumpool h and w stride is {0}".format(self.stride))

        #assume the given pooling and stride parameters are carefully chosen.
        Hout = int((H - hpool) / hstride + 1)
        Wout = int((W - wpool) / wstride + 1)
        #         logger.info("LRP Sumpool {} : {}, {} : {} ".format('H',Hout,'W', Wout))

        Rx = np.zeros(self.X.shape)
        normalizer = 1. / np.sqrt(
            hpool *
            wpool)  #factor in normalizer applied to Y in the forward pass
        R_norm = R / (self.Y / normalizer + epsilon * ((self.Y >= 0) * 2 - 1.))

        #         logger.info("initial Sumpool Relevance is {0}".format(R.shape))
        #         logger.info("the LPR Sumpool Relevance value is : {0}".format(np.sum(~np.isnan(R))))
        #         logger.info("the Sumpool lrp R normalizer is {0}".format(R_norm.shape))
        #         logger.info("the LPR Sumpool normalizer value is : {0}".format(np.sum(~np.isnan(R_norm))))

        Rsave = Rx
        if Rx.shape[2] == 2:
            Rsave3 = np.reshape(Rsave, [10, 40])
#             print('Sum polling 3 passed')
        elif Rx.shape[2] == 10:
            Rsave2 = np.reshape(Rsave, [10, 250])
#             print('Sum polling 2 passed')
        else:
            Rsave1 = np.reshape(Rsave, [28, 280])
#             print('Sum polling 1 passed')

        for i in range(Hout):
            for j in range(Wout):
                Z = self.X[:, i * hstride:i * hstride + hpool, j *
                           wstride:j * wstride + wpool, :]  #input activations.
                sp_check = Z * (R_norm[:, i:i + 1, j:j + 1, :])
                #                 logger.info("during sumpool {} and {} lrp X shape is {}".format(i,j,Z.shape))
                #                 logger.info("during sumpool {} and {} lrp X value is : {}".format(i,j,np.sum(~np.isnan(Z))))
                #                 logger.info("during sumpool {} and {} lrp Rx shape is {}".format(i,j,Rx.shape))
                #                 logger.info("during sumpool {} and {} lrp Rx value is : {}".format(i,j,np.sum(~np.isnan(Rx))))
                #                 logger.info("during sumpool {} and {} weight shape is {}".format(i,j,sp_check.shape))
                #                 logger.info("during sumpool {} and {} weight value is : {}".format(i,j,np.sum(~np.isnan(sp_check))))
                Rx[:, i * hstride:i * hstride + hpool:,
                   j * wstride:j * wstride + wpool:, :] += sp_check
                #                 logger.info("during sumpool {} and {} lrp Rx final is {}".format(i,j,Rx.shape))
                #                 logger.info("during sumpool {} and {} lrp Rx final value is : {}".format(i,j,np.sum(~np.isnan(Rx))))
                if Rx.shape[2] == 2:
                    Rlim3 = np.reshape(Rx, [10, 40])
                    Rsave3 = np.concatenate((Rsave3, Rlim3), axis=1)
                    Rfile3 = '../r_array/sumpol_3.npy'
                    data_io.write(Rsave3, Rfile3)
#                     print("3rd sumpol Rx is saved")
                elif Rx.shape[2] == 10:
                    Rlim2 = np.reshape(Rx, [10, 250])
                    Rsave2 = np.concatenate((Rsave2, Rlim2), axis=1)
                    Rfile2 = '../r_array/sumpol_2.npy'
                    data_io.write(Rsave2, Rfile2)
#                     print("2nd sumpol Rx is saved")
                else:
                    Rlim1 = np.reshape(Rx, [28, 280])
                    Rsave1 = np.concatenate((Rsave1, Rlim1), axis=1)
                    Rfile1 = '../r_array/sumpol_1.npy'
                    data_io.write(Rsave1, Rfile1)
#                     print("1st sumpol Rx is saved")
#             Rfile = '../r_array/sumpool'+str(i)+'_Hout_.npy'
#             data_io.write(Rx, Rfile)
#             print("Sumpool Rx is saved")
        data_io.write(Rx, '../r_array/sumpoll.npy')
        #         print("sumpoll Rx is saved")

        #         logger.info("after sumpool lrp weight shape is {0}".format(sp_check.shape))
        #         logger.info("after sumpool lrp weight value is : {0}".format(np.sum(~np.isnan(sp_check))))
        #         logger.info("after sumpool lrp X shape is {0}".format(Z.shape))
        #         logger.info("after sumpool lrp X value is : {0}".format(np.sum(~np.isnan(Z))))
        #         logger.info("after sumpool lrp output of shape is {0}".format(Rx.shape))
        #         logger.info("after sumpool lrp output value is : {0}".format(np.sum(~np.isnan(Rx))))

        sp_c = '../r_array/x_lrp_sumpool_weight({},{},{}).npy'.format(
            sp_check.shape[0], sp_check.shape[1], sp_check.shape[2])
        data_io.write(sp_check, sp_c)

        xs_lrp = '../r_array/x_lrp_sumpool_input({},{},{}).npy'.format(
            Z.shape[0], Z.shape[1], Z.shape[2])
        data_io.write(Z, xs_lrp)

        lrps_check = '../r_array/sumpool_lrp({},{},{}).npy'.format(
            Rx.shape[0], Rx.shape[1], Rx.shape[2])
        data_io.write(Rx, lrps_check)
        #         logger.info("=======================Lrp Sumpool Done===============================")
        return Rx
Пример #20
0
                                                Y_train,
                                                test_size=0.2,
                                                random_state=42)

model.fit_generator(aug.flow(XTrain, YTrain, batch_size=64),
                    shuffle=True,
                    epochs=1000,
                    steps_per_epoch=len(XTrain) // 64,
                    validation_data=(XTest, YTest),
                    callbacks=[es, mc])

saved_model = load_model('saved_model/best_model.h5')

Y_hat_train = saved_model.predict(X_train)
Y_hat_valid = saved_model.predict(X_valid)
Y_hat_test = saved_model.predict(X_test)

results_name = results_dir + data_name
write(results_name + '_train.predict', Y_hat_train)
write(results_name + '_valid.predict', Y_hat_valid)
write(results_name + '_test.predict', Y_hat_test)

metric_name, scoring_function = 'auc_binary', roc_auc_score

print('Training score for the', metric_name,
      'metric = %5.4f' % scoring_function(Y_train, Y_hat_train))
# print('Valid score for the', metric_name, 'metric = %5.4f' % scoring_function(Y_valid, Y_hat_valid))
# print('Test score for the', metric_name, 'metric = %5.4f' % scoring_function(Y_test, Y_hat_test))
print('Ideal score for the', metric_name,
      'metric = %5.4f' % scoring_function(Y_train, Y_train))
Пример #21
0
    tiles_grid, tiles_rack = tiles.carve_tiles(img, info)

    print(fname_img)
    
    # Save specified tiles to files.
    for label, ij in info_img.items():
        print(ij)
        for mn, tile in tiles_grid:
            # Got a match?
            if ij == mn:
                print(label)

                fname = 'tile_grid_%s.png' % (label)
                f = os.path.join(path_data, 'tiles', fname)
                io.write(f, tile)

                

for fname_img, info_img in info['reference']['rack'].items():
    f = os.path.join(path_data, 'reference', fname_img)
    img, meta = io.read(f)

    tiles_grid, tiles_rack = tiles.carve_tiles(img, info)

    # Save specified tiles to files.
    for label, ij in info_img.items():
        for mn, tile in tiles_rack:
            # Got a match?
            if ij == mn:
                print(label)
Пример #22
0
def blender(sd, srd, Nworkers, stop_writing, output_dir, basename, Lstart,
            Ltime_budget, Lfold):
    try:
        split = int(len(sd.LD.data['Y_train']) * 0.5)
        cycle = 1  #cycle 0 is all zeros
        best_score = 0
        atbest = 0

        while (1):
            try:
                time.sleep(0.5)
                # limit to 100 predictions
                if cycle > (time.time() - Lstart) / Ltime_budget * 100:
                    time.sleep(1)
                    continue

                temp_workers_data = []
                workers_data = []
                for wr_no in range(Nworkers):
                    exec("wr_data =  sd.worker" + str(wr_no))
                    if wr_data['done'] > 0:
                        temp_workers_data.append(wr_data)
                wgroups = [i['blend_group'] for i in temp_workers_data]
                for group in np.unique(wgroups):
                    twdata = [
                        i for i in temp_workers_data
                        if i['blend_group'] == group
                    ]
                    twdata = sorted(twdata,
                                    key=itemgetter('score'),
                                    reverse=True)

                    workers_data.append(twdata[0])
                    try:
                        workers_data.append(twdata[1])
                    except:
                        pass
                    print group, len(twdata), len(workers_data)

                # this is patch for codalab VM
                workers_data_raw = []
                raw0_data = srd.raw_model
                if raw0_data['done'] == 1:
                    workers_data_raw.append(raw0_data)

                raw1_data = srd.raw_model1
                if raw1_data['done'] == 1:
                    workers_data_raw.append(raw1_data)

                raw2_data = srd.raw_model2
                if raw2_data['done'] == 1:
                    workers_data_raw.append(raw2_data)

                raw3_data = srd.raw_model3
                if raw3_data['done'] == 1:
                    workers_data_raw.append(raw3_data)

                raw4_data = srd.raw_model4
                if raw4_data['done'] == 1:
                    workers_data_raw.append(raw4_data)

                if len(workers_data_raw) > 0:
                    workers_data_raw = sorted(workers_data_raw,
                                              key=itemgetter('score'),
                                              reverse=True)
                    workers_data.append(workers_data_raw[0])
                    try:
                        workers_data.append(workers_data_raw[1])
                    except:
                        pass
                    try:
                        workers_data.append(workers_data_raw[2])
                    except:
                        pass

                workers_data = sorted(workers_data,
                                      key=itemgetter('score'),
                                      reverse=True)

                if len(workers_data) > 0:
                    worker0 = workers_data[0]
                    preds_valid = worker0['preds_valid']
                    preds_test = worker0['preds_test']

                    y = sd.yt_raw[split:]
                    if Lfold > 1:
                        y = sd.yt_raw

                    x = worker0['preds_2fld']

                    exec('s0 = libscores.' + sd.LD.info['metric'] + '(y, x)')
                    best_score = s0

                    #short run can't wait for blend (usable only for AutoML 1)
                    try:
                        if s0 > atbest and cycle < 2:
                            atbest = best_score * 0.9  #not reilable score
                            if sd.LD.info['target_num'] == 1:
                                preds_valid = preds_valid[:, 1]
                                preds_test = preds_test[:, 1]

                            preds_valid = np.clip(preds_valid, 0, 1)
                            preds_test = np.clip(preds_test, 0, 1)
                            filename_valid = basename + '_valid_' + str(
                                cycle).zfill(3) + '.predict'
                            data_io.write(
                                os.path.join(output_dir, filename_valid),
                                preds_valid)
                            filename_test = basename + '_test_' + str(
                                cycle).zfill(3) + '.predict'
                            data_io.write(
                                os.path.join(output_dir, filename_test),
                                preds_test)

                            cycle += 1
                    except:
                        pass

                    if Lfold < 4:
                        Lsample = 4
                    else:
                        Lsample = 6
                    xa = 0
                    Lssample = Lsample - 1

                    for iter_worker in itertools.combinations(
                            workers_data[:Lsample], 2):
                        xa = xa + 1
                        worker0 = iter_worker[0]
                        worker1 = iter_worker[1]
                        s01, validt, testt = blend2(
                            worker0['preds_2fld'], worker1['preds_2fld'], y,
                            sd.LD.info['metric'], worker0['preds_valid'],
                            worker1['preds_valid'], worker0['preds_test'],
                            worker1['preds_test'])

                        if s01 > best_score:
                            best_score = s01
                            preds_valid = validt
                            preds_test = testt

                    xa = 0

                    for iter_worker in itertools.combinations(
                            workers_data[:Lssample], 3):
                        xa = xa + 1
                        worker0 = iter_worker[0]
                        worker1 = iter_worker[1]
                        worker2 = iter_worker[2]
                        s012, validt, testt = blend3(
                            worker0['preds_2fld'], worker1['preds_2fld'],
                            worker2['preds_2fld'], y, sd.LD.info['metric'],
                            worker0['preds_valid'], worker1['preds_valid'],
                            worker2['preds_valid'], worker0['preds_test'],
                            worker1['preds_test'], worker2['preds_test'])
                        if s012 > best_score:
                            best_score = s012
                            preds_valid = validt
                            preds_test = testt

                    if stop_writing.is_set(
                    ) == False:  #until last 10 seconds (event signal)
                        if best_score > atbest:
                            atbest = best_score
                            print "naj =", workers_data[0][
                                'score'], best_score, atbest

                            if sd.LD.info['target_num'] == 1:
                                preds_valid = preds_valid[:, 1]
                                preds_test = preds_test[:, 1]

                            preds_valid = np.clip(preds_valid, 0, 1)
                            preds_test = np.clip(preds_test, 0, 1)
                            filename_valid = basename + '_valid_' + str(
                                cycle).zfill(3) + '.predict'
                            data_io.write(
                                os.path.join(output_dir, filename_valid),
                                preds_valid)
                            filename_test = basename + '_test_' + str(
                                cycle).zfill(3) + '.predict'
                            data_io.write(
                                os.path.join(output_dir, filename_test),
                                preds_test)

                            cycle += 1
                    else:
                        print 'stop writing is set'

            except Exception as e:
                print 'exception in blender process' + '     ' + str(e)
                # in case of any problem, let's try again
    except Exception as e:
        print 'exception in blender main process' + '     ' + str(e)
Пример #23
0
          (time.time() - start))

    # Make predictions
    # -----------------
    Y_train = M_clf.predict(D.data['X_train'])
    # Y_valid = M_clf.predict(D.data['X_valid'])
    Y_test = M_clf.predict(D.data['X_test'])
    print("[+] Prediction success, time spent so far %5.2f sec" %
          (time.time() - start))
    # Write results
    # -------------
    filename_train = basename + '_train.predict'
    # filename_valid = basename + '_valid.predict'
    filename_test = basename + '_test.predict'
    print(verbose, "======== Saving results to: " + output_dir)
    data_io.write(os.path.join(output_dir, filename_train), Y_train)
    # data_io.write(os.path.join(output_dir,filename_valid), Y_valid)
    data_io.write(os.path.join(output_dir, filename_test), Y_test)

    print("[+] Results saved, time spent so far %5.2f sec" %
          (time.time() - start))
    time_spent = time.time() - start
    time_left_over = time_budget - time_spent
    print("[+] End cycle, time left %5.2f sec" % time_left_over)
    if time_left_over <= 0: exit()

    time_spent = time.time() - start
    time_left_over = time_budget - time_spent

    overall_time_spent = time.time() - overall_start
    if execution_success:
Пример #24
0
    input_dir = argv[1]
    datanames = data_io.inventory_data(input_dir)

    # The output directory will contain the scores, create it if it does not exist
    output_dir = argv[2]
    data_io.mkdir(output_dir)

    if len(datanames) == 0:
        print("****** No data found ******")

    # Loop over datasets
    for basename in datanames:
        print("****** Processing " + basename.capitalize() + " ******")
        # Fake predictions on validation and test data
        X = data_io.data(
            path.join(input_dir, basename, basename + '_valid.data'))
        Yvalid = random.rand(X.shape[0])
        X = data_io.data(
            path.join(input_dir, basename, basename + '_test.data'))
        Ytest = random.rand(X.shape[0])
        # Write results to files
        data_io.write(path.join(output_dir, basename + '_valid.predict'),
                      Yvalid)
        data_io.write(path.join(output_dir, basename + '_test.predict'), Ytest)

# Lots of debug code...
    data_io.show_io(input_dir, output_dir)
    data_io.show_version()

    exit(0)
Пример #25
0
def predict (LD, output_dir, basename):
	import copy
	import os
	import numpy as np
	import libscores
	import data_converter
	from sklearn import preprocessing, ensemble
	from sklearn.utils import shuffle

	
	LD.data['X_train'], LD.data['Y_train'] = shuffle(LD.data['X_train'], LD.data['Y_train'] , random_state=1)
	
	Y_train = LD.data['Y_train']
	X_train = LD.data['X_train']
	
	Xta = np.copy(X_train)

	X_valid = LD.data['X_valid']
	X_test = LD.data['X_test']
	
	
	Xtv = np.copy(X_valid)
	Xts = np.copy(X_test)
	

	import xgboost as xgb
	if LD.info['name']== 'alexis':

		model = ensemble.RandomForestClassifier(max_depth=140, n_estimators=1800, n_jobs=-1, random_state=0, verbose=0, warm_start=True)
		model2 = ensemble.RandomForestClassifier(max_depth=140, n_estimators=1800, n_jobs=-1, random_state=1, verbose=0, warm_start=True)
		model.fit(X_train, Y_train)	
		model2.fit(X_train, Y_train)
		
		preds_valid0 = model.predict_proba(X_valid)
		preds_test0 = model.predict_proba(X_test)
		
		preds_valid2 = model2.predict_proba(X_valid)
		preds_test2 = model2.predict_proba(X_test)
		
		preds_valid0 = np.array(preds_valid0)
		preds_valid2 = np.array(preds_valid2)
		
		preds_test0 = np.array(preds_test0)
		preds_test2 = np.array(preds_test2)
		
		
		preds_valid = (preds_valid0 + preds_valid2)/2
		preds_test = (preds_test0 + preds_test2)/2
		
		
		preds_valid = preds_valid[:, :, 1]
		preds_valid = preds_valid.T
		
		
		preds_test = preds_test[:, :, 1]
		preds_test = preds_test.T
		
		
	if LD.info['name']== 'dionis': 
		Lest = 600 #600 will consume cca 250 GB of RAM, use 50 for similar result
		#Lest = 50
		
		model = ensemble.RandomForestClassifier( n_jobs=-1, n_estimators=Lest, random_state=0)
		model.fit(X_train, Y_train)	
		preds_valid0 = model.predict_proba(X_valid)
		preds_test0 = model.predict_proba(X_test)

		model = ensemble.RandomForestClassifier( n_jobs=-1, n_estimators=Lest, random_state=1)
		model.fit(X_train, Y_train)	
		preds_valid1 = model.predict_proba(X_valid)
		preds_test1 = model.predict_proba(X_test)
		
		model = ensemble.RandomForestClassifier( n_jobs=-1, n_estimators=Lest, random_state=2)
		model.fit(X_train, Y_train)	
		preds_valid2 = model.predict_proba(X_valid)
		preds_test2 = model.predict_proba(X_test)
		
		model = ensemble.RandomForestClassifier( n_jobs=-1, n_estimators=Lest, random_state=3)
		model.fit(X_train, Y_train)	
		preds_valid3 = model.predict_proba(X_valid)
		preds_test3 = model.predict_proba(X_test)
		
		model = ensemble.RandomForestClassifier( n_jobs=-1, n_estimators=Lest, random_state=4)		
		model.fit(X_train, Y_train)	
		preds_valid4 = model.predict_proba(X_valid)
		preds_test4 = model.predict_proba(X_test)
		
		preds_valid = (preds_valid0 + preds_valid1 + preds_valid2 + preds_valid3 + preds_valid4) # /5 should be included (bug)
		preds_test = (preds_test0 + preds_test1 + preds_test2 + preds_test3 + preds_test4) # /5 should be included (bug)
	
	
	if LD.info['name']== 'grigoris':
		model = ensemble.RandomForestClassifier(criterion='entropy', max_features=0.05, max_depth=5, n_estimators=120, n_jobs=-1, random_state=0, verbose=0)
		model2 = linear_model.LogisticRegression(penalty='l1', random_state=1, n_jobs=-1, C=0.008)
		model3 = ensemble.RandomForestClassifier(criterion='entropy', max_features=0.05, max_depth=5, n_estimators=120, n_jobs=-1, random_state=1, verbose=0)
		model4 = ensemble.RandomForestClassifier(criterion='entropy', max_features=0.05, max_depth=5, n_estimators=120, n_jobs=-1, random_state=2, verbose=0)
		
		preds_valid = np.zeros((X_valid.shape[0], Y_train.shape[1]))
		preds_test = np.zeros((X_test.shape[0], Y_train.shape[1]))
		for pyt in range(Y_train.shape[1]):
			print pyt
			ytp = Y_train[:, pyt]
			model.fit(X_train, ytp)				
			model2.fit(X_train, ytp)
			model3.fit(X_train, ytp)
			model4.fit(X_train, ytp)
			
			preds1v= model.predict_proba (X_valid)[:, 1]
			preds2v= model2.predict_proba (X_valid)[:, 1]
			preds3v= model3.predict_proba (X_valid)[:, 1]
			preds4v= model4.predict_proba (X_valid)[:, 1]
			predsv = (preds1v + preds2v + preds3v + preds4v)/4
			preds_valid[:, pyt] = predsv
			
			preds1t= model.predict_proba (X_test)[:, 1]
			preds2t= model2.predict_proba (X_test)[:, 1]
			preds3t= model3.predict_proba (X_test)[:, 1]
			preds4t= model4.predict_proba (X_test)[:, 1]
			predst = (preds1t + preds2t + preds3t + preds4t)/4
			preds_test[:, pyt] = predst

			
	if LD.info['name']== 'jannis':	
		Xd = X_train[Y_train==0]
		yd = Y_train[Y_train==0]
	
		for a in range(18):
			X_train = np.vstack([X_train, Xd])
			Y_train = np.hstack([Y_train, yd])
		
	
		Xd = X_train[Y_train==2]
		yd = Y_train[Y_train==2]
	
	
		X_train = np.vstack([X_train, Xd])
		Y_train = np.hstack([Y_train, yd])
		
		Y_train_raw = np.array(data_converter.convert_to_bin(Y_train, len(np.unique(Y_train)), False))
		
		
		preds_valid = np.zeros((X_valid.shape[0], Y_train_raw.shape[1]))
		preds_test = np.zeros((X_test.shape[0], Y_train_raw.shape[1]))
		for pyt in range(Y_train_raw.shape[1]):
			if pyt == 0:
				Lbs = 0.2
			else:
				Lbs = 0.5

			model = xgb.XGBClassifier(max_depth=30, learning_rate=0.05, n_estimators=100, silent=True, 
				objective='binary:logistic', nthread=-1, gamma=0, 
				min_child_weight=80, max_delta_step=1, subsample=1, 
				colsample_bytree=1, base_score=Lbs, seed=0, missing=None)

					
			ytp = Y_train_raw[:, pyt]
			model.fit(X_train, ytp)
			
			
			preds1v= model.predict_proba (X_valid)[:, 1]
			preds_valid[:, pyt] = preds1v 
			
			preds1t= model.predict_proba (X_test)[:, 1]
			preds_test[:, pyt] = preds1t
	
		
	if LD.info['name']== 'wallis':
		model = naive_bayes.MultinomialNB(alpha=0.02)
		
		model2 = xgb.XGBClassifier(max_depth=5, learning_rate=0.05, n_estimators=1200, silent=True, 
				objective='multi:softprob', nthread=-1, gamma=0, 
				min_child_weight=1, max_delta_step=0, subsample=1, 
				colsample_bytree=1, base_score=0.5, seed=0, missing=None)
				
	
		model.fit(X_train, Y_train)
		preds_valid1 = model.predict_proba(X_valid)		
		preds_test1 = model.predict_proba(X_test)
		
		model2.fit(X_train, Y_train)
		preds_valid2 = model2.predict_proba(X_valid)
		preds_test2 = model2.predict_proba(X_test)
				
		preds_valid = (preds_valid1 +preds_valid2)/2
		preds_test = (preds_test1 +preds_test2)/2				
	
	import data_io
	if  LD.info['target_num']  == 1:
		preds_valid = preds_valid[:,1]
		preds_test = preds_test[:,1]
								
	preds_valid = np.clip(preds_valid,0,1)
	preds_test = np.clip(preds_test,0,1)
	
	data_io.write(os.path.join(output_dir, basename + '_valid_000.predict'), preds_valid)
	data_io.write(os.path.join(output_dir,basename + '_test_000.predict'), preds_test)
Пример #26
0
    def _epsilon_lrp(self, R, epsilon):
        '''
        LRP according to Eq(58) in DOI: 10.1371/journal.pone.0130140
        '''

        N, Hout, Wout, NF = R.shape
        hf, wf, df, NF = self.W.shape
        hstride, wstride = self.stride

        Rx = np.zeros_like(self.X, dtype=np.float)
        R_norm = R / (self.Y + epsilon * ((self.Y >= 0) * 2 - 1.))
        #         logger.info("=======================Lrp Conv Check===============================")
        #         logger.info("initial Relevance is {0}".format(R.shape))
        #         logger.info("initial Relevance value is : {0}".format(np.sum(~np.isnan(R))))
        #         logger.info("the conv lrp X shape is {0}".format(Rx.shape))
        #         logger.info("the conv lrp X shape value is : {0}".format(np.sum(~np.isnan(Rx))))
        #         logger.info("the conv lrp filter size is ({},{},{},{})".format(self.fh, self.fw, self.fd, self.n))
        #         logger.info("the conv lrp W shape is {0}".format(self.W.shape))
        #         logger.info("the conv lrp W shape value is : {0}".format(np.sum(~np.isnan(self.W))))
        #         logger.info("the conv lrp stride shape is {0}".format(self.stride))
        #         logger.info("{} : {}, {} : {} ".format('lrp H',Hout,'lrp W', Wout))
        Rsave = Rx
        #         logger.info("the Relavance score shape is {0}".format(R_norm.shape))
        #         logger.info("the Relavance score value is : {0}".format(np.sum(~np.isnan(R_norm))))
        #         logger.info("before lrp x shape is {0}".format(self.X.shape))
        #         logger.info("before lrp x value is : {0}".format(np.sum(~np.isnan(self.X))))
        if Rx.shape[2] == 1:
            Rsave4 = np.reshape(Rsave, [10, 10])
        elif Rx.shape[2] == 5:
            Rsave3 = np.reshape(Rsave, [25, 25])
        elif Rx.shape[2] == 14:
            Rsave2 = np.reshape(Rsave, [196, 10])
        else:
            Rsave1 = np.reshape(Rsave, [1024, 1])
        for i in range(Hout):
            for j in range(Wout):
                if self.lrp_aware:
                    Z = self.Z[:, i, j, ...]
                else:
                    Z = self.W[na, ...] * self.X[:, i * hstride:i * hstride +
                                                 hf, j * wstride:j * wstride +
                                                 wf, :, na]
                weight_lrp = Z * (R_norm[:, i:i + 1, j:j + 1, na, :])
                #                 logger.info("during {} and {} lrp X shape is {}".format(i,j,Z.shape))
                #                 logger.info("during {} and {} lrp X value is : {}".format(i,j,np.sum(~np.isnan(Z))))
                #                 logger.info("during {} and {} lrp Rx shape is {}".format(i,j,Rx.shape))
                #                 logger.info("during {} and {} lrp Rx value is : {}".format(i,j,np.sum(~np.isnan(Rx))))
                #                 logger.info("during {} and {} weight shape is {}".format(i,j,weight_lrp.shape))
                #                 logger.info("during {} and {} weight value is : {}".format(i,j,np.sum(~np.isnan(weight_lrp))))
                Rx[:, i * hstride:i * hstride + hf:, j * wstride:j * wstride +
                   wf:, :] += (weight_lrp).sum(axis=4)
                #                 logger.info("during {} and {} lrp Rx final is {}".format(i,j,Rx.shape))
                #                 logger.info("during {} and {} Rx final value is : {}".format(i,j,np.sum(~np.isnan(Rx))))
                if Rx.shape[2] == 1:
                    Rlim4 = np.reshape(Rx, [10, 10])
                    Rsave4 = np.concatenate((Rsave4, Rlim4), axis=1)
                    Rfile4 = '../r_array/convolution_4.npy'
                    data_io.write(Rsave4, Rfile4)
                elif Rx.shape[2] == 5:
                    Rlim3 = np.reshape(Rx, [25, 25])
                    Rsave3 = np.concatenate((Rsave3, Rlim3), axis=1)
                    Rfile3 = '../r_array/convolution_3.npy'
                    data_io.write(Rsave3, Rfile3)
                elif Rx.shape[2] == 14:
                    Rlim2 = np.reshape(Rx, [196, 10])
                    Rsave2 = np.concatenate((Rsave2, Rlim2), axis=1)
                    Rfile2 = '../r_array/convolution_2.npy'
                    data_io.write(Rsave2, Rfile2)
                else:
                    Rlim1 = np.reshape(Rx, [1024, 1])
                    Rsave1 = np.concatenate((Rsave1, Rlim1), axis=1)
                    Rfile1 = '../r_array/convolution_1.npy'
                    data_io.write(Rsave1, Rfile1)
#         logger.info("after conv lrp weight shape is {0}".format(weight_lrp.shape))
#         logger.info("after conv lrp weight value is : {0}".format(np.sum(~np.isnan(weight_lrp))))
        weight_l = '../r_array/x_lrp_conv_weight({},{},{}).npy'.format(
            weight_lrp.shape[0], weight_lrp.shape[1], weight_lrp.shape[2])
        data_io.write(weight_lrp, weight_l)

        #         logger.info("after conv lrp tensordot X shape is {0}".format(self.X.shape))
        #         logger.info("after conv lrp tensordot X value is : {0}".format(np.sum(~np.isnan(self.X))))
        x_lrp = '../r_array/x_lrp_conv_input({},{},{}).npy'.format(
            self.X.shape[0], self.X.shape[1], self.X.shape[2])
        data_io.write(self.X, x_lrp)

        #         logger.info("after conv lrp output of shape is {0}".format(Rx.shape))
        #         logger.info("after conv lrp output value is : {0}".format(np.sum(~np.isnan(Rx))))
        lrp_check = '../r_array/conv_lrp({},{},{}).npy'.format(
            Rx.shape[0], Rx.shape[1], Rx.shape[2])
        data_io.write(Rx, lrp_check)

        data_io.write(Rx, '../r_array/convolution.npy')
        #         logger.info("=======================Lrp Conv Done===============================")
        return Rx
Пример #27
0
def _main(args):
    # Mark starting time of ingestion
    start = time.time()
    logger.info("=" * 5 + " Start ingestion program. ")

    #### Check whether everything went well
    ingestion_success = True

    dataset_dir = args.dataset_dir
    output_dir = args.output_dir
    ingestion_program_dir = args.ingestion_program_dir
    code_dir = args.code_dir
    score_dir = args.score_dir
    time_budget = args.time_budget

    if dataset_dir.endswith('run/input') and\
        code_dir.endswith('run/program'):
        logger.debug(
            "Since dataset_dir ends with 'run/input' and code_dir "
            "ends with 'run/program', suppose running on " +
            "CodaLab platform. Modify dataset_dir to 'run/input_data' "
            "and code_dir to 'run/submission'. " +
            "Directory parsing should be more flexible in the code of " +
            "compute worker: we need explicit directories for " +
            "dataset_dir and code_dir.")
        dataset_dir = dataset_dir.replace('run/input', 'run/input_data')
        code_dir = code_dir.replace('run/program', 'run/submission')

    # Show directories for debugging
    logger.debug("sys.argv = " + str(sys.argv))
    logger.debug("Using dataset_dir: " + dataset_dir)
    logger.debug("Using output_dir: " + output_dir)
    logger.debug("Using ingestion_program_dir: " + ingestion_program_dir)
    logger.debug("Using code_dir: " + code_dir)

    # Our libraries
    path.append(ingestion_program_dir)
    path.append(code_dir)
    #IG: to allow submitting the starting kit as sample submission
    path.append(code_dir + '/sample_code_submission')
    import data_io
    from dataset import AutoSpeechDataset  # THE class of AutoNLP datasets

    #### INVENTORY DATA (and sort dataset names alphabetically)
    datanames = data_io.inventory_data(dataset_dir)
    #### Delete zip files and metadata file
    datanames = [x for x in datanames if x.endswith('.data')]

    if len(datanames) != 1:
        raise ValueError("{} datasets found in dataset_dir={}!\n"\
                        .format(len(datanames), dataset_dir) +
                        "Please put only ONE dataset under dataset_dir.")

    basename = datanames[0]
    D = AutoSpeechDataset(os.path.join(dataset_dir, basename))
    metadata = D.get_metadata()
    time_budget = metadata.get("time_budget", time_budget)
    logger.info("Time budget: {}".format(time_budget))

    write_start_file(output_dir,
                     start_time=start,
                     time_budget=time_budget,
                     task_name=basename.split('.')[0])

    logger.info("************************************************")
    logger.info("******** Processing dataset " + basename[:-5].capitalize() +
                " ********")
    logger.info("************************************************")

    ##### Begin creating training set and test set #####
    logger.info("Reading training set and test set...")
    D.read_dataset()
    ##### End creating training set and test set #####

    ## Get correct prediction shape
    num_examples_test = D.get_test_num()
    output_dim = D.get_class_num()
    correct_prediction_shape = (num_examples_test, output_dim)

    try:
        # ========= Creating a model
        timer = Timer()
        timer.set(
            20 * 60
        )  # 20 min for participants to initializing and install other packages
        with timer.time_limit("Importing model"):
            from model import Model  # in participants' model.py

        ##### Begin creating model #####
        logger.info("Creating model...")
        with timer.time_limit('Initialization'):
            M = Model(metadata)
        ###### End creating model ######
    except TimeoutException as e:
        logger.info(
            "[-] Initialization phase exceeded time budget. Move to train/predict phase"
        )
    except Exception as e:
        logger.info("Failed to initializing model.")
        logger.error("Encountered exception:\n" + str(e), exc_info=True)
        raise
    finally:
        try:
            timer = Timer()
            timer.set(time_budget)
            # Check if the model has methods `train` and `test`.
            for attr in ['train', 'test']:
                if not hasattr(M, attr):
                    raise ModelApiError(
                        "Your model object doesn't have the method " +
                        "`{}`. Please implement it in model.py.")

            # Check if model.py uses new done_training API instead of marking
            # stopping by returning None
            use_done_training_api = hasattr(M, 'done_training')
            if not use_done_training_api:
                logger.warning(
                    "Your model object doesn't have an attribute " +
                    "`done_training`. But this is necessary for ingestion " +
                    "program to know whether the model has done training " +
                    "and to decide whether to proceed more training. " +
                    "Please add this attribute to your model.")

            # Keeping track of how many predictions are made
            prediction_order_number = 0

            # Start the CORE PART: train/predict process
            while (not (use_done_training_api and M.done_training)):

                # Train the model
                logger.info("Begin training the model...")
                remaining_time_budget = timer.remain
                with timer.time_limit('training'):
                    M.train(D.get_train(), remaining_time_budget=timer.remain)
                logger.info("Finished training the model.")

                # Make predictions using the trained model
                logger.info("Begin testing the model by making predictions " +
                            "on test set...")
                remaining_time_budget = timer.remain
                with timer.time_limit('predicting'):
                    Y_pred = M.test(
                        D.get_test(),
                        remaining_time_budget=remaining_time_budget)
                logger.info("Finished making predictions.")

                if Y_pred is None:  # Stop train/predict process if Y_pred is None
                    logger.info("The method model.test returned `None`. " +
                                "Stop train/predict process.")
                    break
                else:  # Check if the prediction has good shape
                    prediction_shape = tuple(Y_pred.shape)
                    if prediction_shape != correct_prediction_shape:
                        raise BadPredictionShapeError(
                            "Bad prediction shape! Expected {} but got {}."\
                            .format(correct_prediction_shape, prediction_shape)
                        )
                # Write timestamp to 'start.txt'
                write_timestamp(output_dir,
                                predict_idx=prediction_order_number,
                                timestamp=timer.exec)
                # Prediction files: adult.predict_0, adult.predict_1, ...
                filename_test = basename[:-5] + '.predict_' +\
                    str(prediction_order_number)
                # Write predictions to output_dir
                tmp_pred = np.argmax(Y_pred, axis=1)
                # data_io.write(os.path.join(output_dir,filename_test), Y_pred)
                data_io.write(os.path.join(output_dir, filename_test),
                              tmp_pred)
                prediction_order_number += 1
                logger.info("[+] {0:d} predictions made, time spent so far {1:.2f} sec"\
                            .format(prediction_order_number, time.time() - start))
                logger.info("[+] Time left {0:.2f} sec".format(timer.remain))
        except TimeoutException as e:
            logger.info(
                "[-] Ingestion program exceeded time budget. Predictions "
                "made so far will be used for evaluation.")
        except Exception as e:
            ingestion_success = False
            logger.info("Failed to run ingestion.")
            logger.error("Encountered exception:\n" + str(e), exc_info=True)
            raise
        finally:
            # Finishing ingestion program
            end_time = time.time()
            overall_time_spent = end_time - start

            # Write overall_time_spent to a end.txt file
            end_filename = 'end.txt'
            with open(os.path.join(output_dir, end_filename), 'w') as f:
                f.write('ingestion_duration: ' + str(overall_time_spent) +
                        '\n')
                f.write('ingestion_success: ' + str(int(ingestion_success)) +
                        '\n')
                f.write('end_time: ' + str(end_time) + '\n')
                logger.info("Wrote the file {} marking the end of ingestion."\
                            .format(end_filename))
                if ingestion_success:
                    logger.info(
                        "[+] Done. Ingestion program successfully terminated.")
                    logger.info("[+] Overall time spent %5.2f sec " %
                                overall_time_spent)
                else:
                    logger.info(
                        "[-] Done, but encountered some errors during ingestion."
                    )
                    logger.info("[-] Overall time spent %5.2f sec " %
                                overall_time_spent)

            # Copy all files in output_dir to score_dir
            os.system("cp -R {} {}".format(os.path.join(output_dir, '*'),
                                           score_dir))
            logger.debug(
                "Copied all ingestion output to scoring output directory.")

            logger.info("[Ingestion terminated]")
Пример #28
0
    def forward(self, X, lrp_aware=False):
        '''
        Realizes the forward pass of an input through the convolution layer.

        Parameters
        ----------
        X : numpy.ndarray
            a network input, shaped (N,H,W,D), with
            N = batch size
            H, W, D = input size in heigth, width, depth

        lrp_aware : bool
            controls whether the forward pass is to be computed with awareness for multiple following
            LRP calls. this will sacrifice speed in the forward pass but will save time if multiple LRP
            calls will follow for the current X, e.g. wit different parameter settings or for multiple
            target classes.

        Returns
        -------
        Y : numpy.ndarray
            the layer outputs.
        '''

        self.lrp_aware = lrp_aware
        self.X = X
        N, H, W, D = X.shape
        #         logger.info("=======================Conv Check===============================")
        #         logger.info("the conv X shape is {0}".format(X.shape))
        #         logger.info("the conv X shape value is : {0}".format(np.sum(~np.isnan(X))))
        #         logger.info("the conv filter size is ({},{},{},{})".format(self.fh, self.fw, self.fd, self.n))

        hf, wf, df, nf = self.W.shape
        #         logger.info("the conv W shape is {0}".format(self.W.shape))
        #         logger.info("the conv W shape value is : {0}".format(np.sum(~np.isnan(self.W))))
        hstride, wstride = self.stride
        #         logger.info("the conv stride shape is {0}".format(self.stride))
        numfilters = self.n
        #         logger.info("the number of filter is {}".format(numfilters))

        #assume the given pooling and stride parameters are carefully chosen.
        Hout = (H - hf) // hstride + 1
        Wout = (W - wf) // wstride + 1
        #         logger.info("{} : {}, {} : {} ".format('H',Hout,'W', Wout))

        #initialize pooled output
        self.Y = np.zeros((N, Hout, Wout, numfilters))
        if self.lrp_aware:
            self.Z = np.zeros(
                (N, Hout, Wout, hf, wf, df,
                 nf))  #initialize container for precomputed forward messages
            for i in range(Hout):
                for j in range(Wout):
                    self.Z[:, i, j, ...] = self.W[
                        na, ...] * self.X[:, i * hstride:i * hstride + hf,
                                          j * wstride:j * wstride + wf, :,
                                          na]  # N, hf, wf, df, nf
                    self.Y[:, i,
                           j, :] = self.Z[:, i, j,
                                          ...].sum(axis=(1, 2, 3)) + self.B
        else:
            for i in range(Hout):
                for j in range(Wout):
                    self.Y[:, i, j, :] = np.tensordot(
                        X[:, i * hstride:i * hstride + hf:,
                          j * wstride:j * wstride + wf:, :],
                        self.W,
                        axes=([1, 2, 3], [0, 1, 2])) + self.B

#         logger.info("after conv tensordot W is {0}".format(self.W.shape))
#         logger.info("after conv tensordot W  shape value is : {0}".format(np.sum(~np.isnan(self.W))))

#         logger.info("after conv tensordot shape is {0}".format(self.X.shape))
#         logger.info("after conv tensordot X shape value is : {0}".format(np.sum(~np.isnan(self.X))))
        input_check = '../r_array/conv_input({},{},{}).npy'.format(
            self.X.shape[0], self.X.shape[1], self.X.shape[2])
        data_io.write(self.X, input_check)

        #         logger.info("after conv checking tensordot shape is {0}".format(X.shape))
        #         logger.info("after conv checking tensordot X shape value is : {0}".format(np.sum(~np.isnan(X))))
        kind_check = '../r_array/conv_check_input({},{},{}).npy'.format(
            X.shape[0], X.shape[1], X.shape[2])
        data_io.write(X, kind_check)

        #         logger.info("the conv output of shape is {0}".format(self.Y.shape))
        #         logger.info("after conv checking output shape value is : {0}".format(np.sum(~np.isnan(self.Y))))
        output_check = '../r_array/conv_output({},{},{}).npy'.format(
            self.Y.shape[0], self.Y.shape[1], self.Y.shape[2])
        data_io.write(self.Y, output_check)
        #         logger.info("=======================Conv Done================================")
        return self.Y
Пример #29
0
        raise ValueError("Wrong set type, should be `train` or `test`!")
    # when the task if binary.classification or regression, transform it to multilabel
    if task == 'regression':
        labels = regression_to_multilabel(labels)
    elif task == 'binary.classification':
        labels = binary_to_multilabel(labels)
    return features, labels


if __name__ == '__main__':
    input_dir = '../../../autodl-contrib/raw_datasets/automl'
    output_dir = '../'
    for dataset_name in ['dorothea', 'adult']:
        D = DataManager(dataset_name,
                        input_dir,
                        replace_missing=False,
                        verbose=verbose)
        X_test, Y_test = _prepare_metadata_features_and_labels(D,
                                                               set_type='test')
        X_train, Y_train = _prepare_metadata_features_and_labels(
            D, set_type='train')
        print(Y_test.shape)
        time_budget = 7200
        model = AutoSklearnClassifier(time_left_for_this_task=time_budget,
                                      per_run_time_limit=time_budget // 10)
        model.fit(X_train, Y_train)
        predict_path = os.path.join(output_dir, dataset_name + '.predict')
        Y_hat_test = model.predict_proba(X_test)
        print(Y_hat_test.shape)
        data_io.write(predict_path, Y_hat_test)
Пример #30
0
    time_spent = time.time() - start
    vprint( verbose,  "time spent %5.2f sec" %time_spent)

    vprint( verbose,  "======== Creating model ==========")
    train_data = D.data['X_train']
    labels = D.data['Y_train']
    valid_data = D.data['X_valid']
    test_data = D.data['X_test']
    print (train_data.shape)
    print (valid_data.shape)
    print (test_data.shape)
    print (labels.shape)
    time_spent = 0                   # Initialize time spent learning
    #if basename in ["albert","dilbert","fabert","robert","volkert"]:
    (Y_valid, Y_test) = locals()[basename+"_predict"](train_data,labels, valid_data, test_data,output_dir, D.info['time_budget'],D.info['target_num'],D.info['is_sparse'])
    time_spent = time.time() - start

    vprint( verbose,  "[+] Prediction success, time spent so far %5.2f sec" % (time.time() - start))
    # Write results
    filename_valid = basename + '_valid_' + '.predict'
    data_io.write(os.path.join(output_dir,filename_valid), Y_valid)
    filename_test = basename + '_test_' + '.predict'
    data_io.write(os.path.join(output_dir,filename_test), Y_test)

    vprint( verbose,  "[+] Results saved, time spent so far %5.2f sec" % (time.time() - start))
    time_spent = time.time() - start

overall_time_spent = time.time() - overall_start
vprint( verbose,  "[+] Done")
vprint( verbose,  "[+] Overall time spent %5.2f sec " % overall_time_spent)
Пример #31
0
    #R = nn.lrp(ypred,'alphabeta',2)    #as Eq(60) from DOI: 10.1371/journal.pone.0130140

    #R = nn.lrp(Y[na,i]) #compute first layer relevance according to the true class label
    '''
    yselect = 3
    yselect = (np.arange(Y.shape[1])[na,:] == yselect)*1.
    R = nn.lrp(yselect) #compute first layer relvance for an arbitrarily selected class
    '''

    #undo input normalization for digit drawing. get it back to range [0,1] per pixel
    x = (x + 1.) / 2.

    #render input and heatmap as rgb images
    digit = render.digit_to_rgb(x, scaling=3)
    hm = render.hm_to_rgb(R, X=x, scaling=3, sigma=2)
    digit_hm = render.save_image([digit, hm], '../heatmap.png')
    data_io.write(R, '../heatmap.npy')

    #display the image as written to file
    plt.imshow(digit_hm, interpolation='none')
    plt.axis('off')
    plt.show()

#note that modules.Sequential allows for batch processing inputs
'''
x = X[:10,:]
y = nn.forward(x)
R = nn.lrp(y)
data_io.write(R,'../Rbatch.npy')
'''
Пример #32
0
                break
            else: # Check if the prediction has good shape
                prediction_shape = tuple(Y_pred.shape)
                if prediction_shape != correct_prediction_shape:
                    raise BadPredictionShapeError(
                        "Bad prediction shape! Expected {} but got {}."\
                        .format(correct_prediction_shape, prediction_shape)
                    )
            # Write timestamp to 'start.txt'
            write_timestamp(output_dir, predict_idx=prediction_order_number,
                            timestamp=timer.exec)
            # Prediction files: adult.predict_0, adult.predict_1, ...
            filename_test = basename[:-5] + '.predict_' +\
                str(prediction_order_number)
            # Write predictions to output_dir
            data_io.write(os.path.join(output_dir,filename_test), Y_pred)
            prediction_order_number += 1
            logger.info("[+] {0:d} predictions made, time spent so far {1:.2f} sec"\
                        .format(prediction_order_number, time.time() - start))
            logger.info("[+] Time left {0:.2f} sec".format(timer.remain))
    except TimeoutException as e:
        logger.info("[-] Ingestion program exceeded time budget. Predictions "
                    "made so far will be used for evaluation.")
    except Exception as e:
        ingestion_success = False
        logger.info("Failed to run ingestion.")
        logger.error("Encountered exception:\n" + str(e), exc_info=True)

    # Finishing ingestion program
    end_time = time.time()
    overall_time_spent = end_time - start
Пример #33
0
def predict (LD, output_dir, basename):
	
	import os
	import numpy as np
	import random
	import data_converter
	from sklearn import preprocessing, feature_selection, decomposition
	from sklearn.utils import shuffle
	import time
	from sklearn.externals import joblib
	from scipy import sparse
	
	from lasagne import layers
	from lasagne.updates import nesterov_momentum
	from lasagne.updates import norm_constraint
	import lasagne
	import theano
	import theano.tensor as T
	from lasagne.regularization import regularize_layer_params, regularize_layer_params_weighted, l2, l1
	np.random.seed(0)
	random.seed(0)

	LD.data['X_train'], LD.data['Y_train'] = shuffle(LD.data['X_train'], LD.data['Y_train'] , random_state=1)
	X_train = LD.data['X_train']
	X_valid = LD.data['X_valid']
	X_test = LD.data['X_test']
	
	fs = decomposition.TruncatedSVD(n_components=400, n_iter=5, random_state=1)
	fs.fit(X_train)
	X_train = fs.transform(X_train)
	X_valid = fs.transform(X_valid)
	X_test = fs.transform(X_test)
	
	
	normx = preprocessing.Normalizer()
	
	normx.fit(X_train)
	X_train = normx.transform(X_train)
	X_valid = normx.transform(X_valid)
	X_test = normx.transform(X_test)
	
	y_train = np.copy(LD.data['Y_train'])
	
	def batches(X, y, csize, rs):
		X, y = shuffle(X, y, random_state=rs)
		for cstart in range(0, X.shape[0] - csize+1, csize):
			Xc = X[cstart:cstart+csize] 
			yc = y[cstart:cstart+csize]
			
			Xc = np.float32(Xc)
			yc = np.float32(yc)
			yield  Xc, yc
	
	input_var = T.matrix('inputs')
	target_var = T.matrix('targets')
	
	l_in = lasagne.layers.InputLayer(shape=(None, X_train.shape[1]),
		nonlinearity=lasagne.nonlinearities.rectify,
		W=lasagne.init.Sparse(),
	     input_var=input_var)
	     
	l_hid1 = lasagne.layers.DenseLayer(
	    l_in, num_units= 600,
	    nonlinearity=lasagne.nonlinearities.rectify,
	    W=lasagne.init.Sparse()
	    )
	    
	l_hid2 = lasagne.layers.DenseLayer(
	    l_hid1, num_units= 600,
	    nonlinearity=lasagne.nonlinearities.rectify,
	    W=lasagne.init.Sparse()
	)
	
	Lnum_out_units = y_train.shape[1]
	
	
	l_out = lasagne.layers.DenseLayer(
		l_hid2, num_units=Lnum_out_units,
		nonlinearity=lasagne.nonlinearities.sigmoid)

	network = l_out
	
	prediction = lasagne.layers.get_output(network)

	loss = lasagne.objectives.squared_error(prediction, target_var)
	loss = loss.mean()
	
	params = lasagne.layers.get_all_params(network, trainable=True)
	updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.3, momentum=0.90)
	
	train_fn = theano.function([input_var, target_var], loss, updates=updates)

	for epoch in range(20):		
		train_err = 0
		train_batches = 0
		for batch in batches(X_train, y_train, epoch+1, epoch):
		    Xt, yt = batch
		    train_err += train_fn(Xt, yt)
		    train_batches += 1
		
	xml1 = T.matrix('xml1')
	Xlt1 = lasagne.layers.get_output(l_out, xml1, deterministic=True)
	f2 = theano.function([xml1], Xlt1)
		
	csize= 1000
	preds_valid = np.zeros((X_valid.shape[0], y_train.shape[1]))
	for cstart in range(0, X_valid.shape[0], csize):			
		Xo = X_valid[cstart:cstart+csize]
		Xo = np.float32(Xo)
		pp = f2(Xo)
		preds_valid[cstart:cstart+csize] = pp
	
	preds_test = np.zeros((X_test.shape[0], y_train.shape[1]))
	for cstart in range(0, X_test.shape[0], csize):			
		Xo = X_test[cstart:cstart+csize]
		Xo = np.float32(Xo)
		pp = f2(Xo)
		preds_test[cstart:cstart+csize] = pp
			

	import data_io
	if  LD.info['target_num']  == 1:
		preds_valid = preds_valid[:,1]
		preds_test = preds_test[:,1]
					

	eps = 0.0001
	preds_valid = np.round(np.clip(preds_valid,0+eps,1-eps),4)
	preds_test = np.round(np.clip(preds_test,0+eps,1-eps),4)
	
	
	cycle = 0 
	filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict'
	data_io.write(os.path.join(output_dir,filename_valid), preds_valid)
	filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict'
	data_io.write(os.path.join(output_dir,filename_test), preds_test)

	
	
Пример #34
0
def blender (sd, srd, srf, src, Nworkers, stop_writing, output_dir, basename, Lstart, Ltime_budget, train_split, test_split):
	try:

		cycle = 0 #cycle 0 is all zeros
		best_score = -2
		atbest = -2
		
		while(1):
			try:
				time.sleep(0.5)
				temp_workers_data = []
				workers_data = []
				for wr_no in range(Nworkers):
					exec("wr_data =  sd.worker"+str(wr_no))
					if wr_data['done'] > 0:
						temp_workers_data.append(wr_data)
				wgroups = [i['blend_group'] for i in temp_workers_data]
				for group in np.unique(wgroups):
					twdata = [i for i in temp_workers_data if i['blend_group'] == group]
					twdata = sorted(twdata, key=itemgetter('score'), reverse=True)
					

					workers_data.append(twdata[0])
					try:
						workers_data.append(twdata[1])
					except:
						pass
				
				workers_data_raw = []
				raw0_data =  srd.raw_model
				if raw0_data['done'] ==1:
					workers_data_raw.append(raw0_data)
					
				raw1_data =  srd.raw_model1
				if raw1_data['done'] ==1:
					workers_data_raw.append(raw1_data)
					
				raw2_data =  srd.raw_model2
				if raw2_data['done'] ==1:
					workers_data_raw.append(raw2_data)
					
				raw3_data =  srd.raw_model3
				if raw3_data['done'] ==1:
					workers_data_raw.append(raw3_data)
				
				raw4_data =  srd.raw_model4
				if raw4_data['done'] ==1:
					workers_data_raw.append(raw4_data)
				
					
				raw5_data =  srf.model1
				if raw5_data['done'] ==1:
					workers_data_raw.append(raw5_data)
				
				raw6_data =  src.model1
				if raw6_data['done'] ==1:
					workers_data_raw.append(raw6_data)

				
				if len(workers_data_raw) > 0:
				
					workers_data_raw = sorted(workers_data_raw, key=itemgetter('score'), reverse=True)
					workers_data.append(workers_data_raw[0])
					try:
						workers_data.append(workers_data_raw[1])
					except:
						pass
					try:
						workers_data.append(workers_data_raw[2])
					except:
						pass
					try:
						workers_data.append(workers_data_raw[3])
					except:
						pass
					try:
						workers_data.append(workers_data_raw[4])
					except:
						pass
					try:
						workers_data.append(workers_data_raw[5])
					except:
						pass
				
				workers_data = sorted(workers_data, key=itemgetter('score'), reverse=True)
				
				
				if len(workers_data) > 0:
					worker0 = workers_data[0]
					preds_valid = worker0['preds_valid'] 
					preds_test = worker0['preds_test'] 
					
					y = sd.yt_raw[test_split:]
					
					x = worker0['preds_2fld']
					
					exec('s0 = libscores.'+ sd.LD.info['metric']  + '(y, x, "' + sd.LD.info['task'] + '")')
					try:
						if sd.LD.info['task'] != 'regression' and s0 <= 0:
							exec('CVscore_auc = libscores.auc_metric(sd.yt_raw[test_split:], preds, "' + sd.LD.info['task'] + '")')
							s0 += CVscore_auc/10
					except:
						pass
					best_score = s0
					
					try:
						if s0 > atbest:
							atbest = best_score
							if sd.LD.info['target_num']  == 1:
								if sd.LD.info['task'] != 'regression':
									preds_valid = preds_valid[:,1]
									preds_test = preds_test[:,1]
							if sd.LD.info['task'] != 'regression':
								preds_valid = np.clip(preds_valid,0,1)
								preds_test = np.clip(preds_test,0,1)
							filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict'
							data_io.write(os.path.join(output_dir,filename_valid), preds_valid)
							filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict'
							data_io.write(os.path.join(output_dir,filename_test), preds_test)
					except:
						pass
						
					
					Lsample = 4
					Lssample = Lsample - 1
					
					for iter_worker in itertools.combinations(workers_data[:Lsample], 2):
						worker0 = iter_worker[0]
						worker1 = iter_worker[1]
						s01, validt, testt = blend2(worker0['preds_2fld'],worker1['preds_2fld'],y, sd.LD.info['metric'] , sd.LD.info['task'],
										    worker0['preds_valid'], worker1['preds_valid'], 
										    worker0['preds_test'], worker1['preds_test'])
					
						if s01 > best_score:
							best_score = s01
							preds_valid = validt
							preds_test = testt
					
					for iter_worker in itertools.combinations(workers_data[:Lssample], 3):
						worker0 = iter_worker[0]
						worker1 = iter_worker[1]
						worker2 = iter_worker[2]
						s012, validt, testt = blend3(worker0['preds_2fld'],worker1['preds_2fld'],worker2['preds_2fld'],y, sd.LD.info['metric'] ,  sd.LD.info['task'],
										    worker0['preds_valid'], worker1['preds_valid'], worker2['preds_valid'], 
										    worker0['preds_test'], worker1['preds_test'], worker2['preds_test'])
						if s012 > best_score:
							best_score = s012
							preds_valid = validt
							preds_test = testt


					if stop_writing.is_set() == False and best_score > atbest:
						atbest = best_score
						
						if  sd.LD.info['target_num']  == 1:
							if sd.LD.info['task'] != 'regression':
								preds_valid = preds_valid[:,1]
								preds_test = preds_test[:,1]
						if sd.LD.info['task'] != 'regression':	
							preds_valid = np.clip(preds_valid,0,1)
							preds_test = np.clip(preds_test,0,1)
						filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict'
						data_io.write(os.path.join(output_dir,filename_valid), preds_valid)
						filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict'
						data_io.write(os.path.join(output_dir,filename_test), preds_test)
					
						#cycle += 1 
					
			except Exception as e:
				print 'exception in blender process' + '     ' +  str(e)
				# in case of any problem, let's try again
	except Exception as e:
				print 'exception in blender main process' + '     ' +  str(e)
Пример #35
0
    yselect = (np.arange(Y.shape[1])[na,:] == yselect)*1.
    R = nn.lrp(ypred*yselect) #compute first layer relvance for an arbitrarily selected class
    '''

    #undo input normalization for digit drawing. get it back to range [0,1] per pixel
    x = (x + 1.) / 2.

    if not np == numpy:  # np=cupy
        x = np.asnumpy(x)
        R = np.asnumpy(R)

    #render input and heatmap as rgb images
    digit = render.digit_to_rgb(x, scaling=3)
    hm = render.hm_to_rgb(R, X=x, scaling=3, sigma=2)
    digit_hm = render.save_image([digit, hm], '../heatmap.png')
    data_io.write(R, '../heatmap.npy')

    #display the image as written to file
    plt.imshow(digit_hm, interpolation='none')
    plt.axis('off')
    plt.show()

#note that modules.Sequential allows for batch processing inputs
if True:
    N = 256
    t_start = time.time()
    x = X[:N, ...]
    y = nn.forward(x)
    R = nn.lrp(y)
    data_io.write(R, '../Rbatch.npy')
    print('Computation of {} heatmaps using {} in {:.3f}s'.format(
Пример #36
0
def predict (LD, output_dir, basename):
	
	import os
	import numpy as np
	import random
	np.random.seed(0)
	random.seed(0)
	import data_converter
	from sklearn import preprocessing, decomposition
	from sklearn.utils import shuffle
	import time
	from sklearn.externals import joblib
	
	from lasagne import layers
	from lasagne.updates import nesterov_momentum
	from lasagne.updates import norm_constraint, total_norm_constraint
	import lasagne
	import theano
	import theano.tensor as T
	from lasagne.regularization import regularize_layer_params, regularize_layer_params_weighted, l2, l1


	LD.data['X_train'], LD.data['Y_train'] = shuffle(LD.data['X_train'], LD.data['Y_train'] , random_state=1)
	X_train = LD.data['X_train']
	X_valid = LD.data['X_valid']
	X_test = LD.data['X_test']
	

	X_train = X_train[:, 0:2000]
	X_valid = X_valid[:, 0:2000]
	X_test = X_test[:, 0:2000]
	X_train = X_train.toarray()
	X_valid = X_valid.toarray()
	X_test = X_test.toarray()
	
	
	
	fs = decomposition.PCA(n_components=100)
	fs.fit(X_train)
	
	X_train2 = fs.transform(X_train)
	X_valid2 = fs.transform(X_valid)
	X_test2 = fs.transform(X_test)
	
	X_train = X_train[:, 0:200]
	X_valid = X_valid[:, 0:200]
	X_test = X_test[:, 0:200]
	
	X_train = np.float32(X_train)
	X_valid = np.float32(X_valid)
	X_test = np.float32(X_test)
	
	X_train = np.hstack([X_train, X_train2])
	X_valid = np.hstack([X_valid, X_valid2])
	X_test = np.hstack([X_test, X_test2])
	
	
	normx = preprocessing.StandardScaler()
	
	normx.fit(X_train)
	X_train = normx.transform(X_train)
	X_valid = normx.transform(X_valid)
	X_test = normx.transform(X_test)
	
	X_train = np.float32(X_train)
	X_valid = np.float32(X_valid)
	X_test = np.float32(X_test)
	
	print "p5"
	
	y_train = np.copy(LD.data['Y_train'])
	y_train = np.float32(y_train)
	y_train = y_train.reshape((-1, 1))
	
	def batches(X, y, csize, rs):
		X, y = shuffle(X, y, random_state=rs)
		for cstart in range(0, X.shape[0] - csize+1, csize):
			Xc = X[cstart:cstart+csize] 
			yc = y[cstart:cstart+csize]
			yield  Xc, yc
	
	input_var = T.matrix('inputs')
	target_var = T.matrix('targets')
	
	l_in = lasagne.layers.InputLayer(shape=(None, X_train.shape[1]),
	     input_var=input_var,
	     nonlinearity=None,
	     W=lasagne.init.Sparse())
	     
	l_hid1 = lasagne.layers.DenseLayer(
	    l_in, num_units= 100,
	    nonlinearity=lasagne.nonlinearities.sigmoid,
	    W=lasagne.init.Sparse())
	    
	l_hid2 = lasagne.layers.DenseLayer(
	    l_hid1, num_units= 40,
	    nonlinearity=lasagne.nonlinearities.tanh,
	    W=lasagne.init.GlorotUniform()
	    )

	Lnum_out_units = 1
	
	l_out = lasagne.layers.DenseLayer(
		l_hid2, num_units=Lnum_out_units,
		nonlinearity=None)

	network = l_out
	
	prediction = lasagne.layers.get_output(network)

	loss = lasagne.objectives.squared_error(prediction, target_var)
	loss = loss.mean()
	
	params = lasagne.layers.get_all_params(network, trainable=True)
	all_grads = T.grad(loss, params)
	scaled_grads = total_norm_constraint(all_grads, 100)
	updates = lasagne.updates.sgd(scaled_grads, params, learning_rate=0.001)
	
	train_fn = theano.function([input_var, target_var], loss, updates=updates)

	for epoch in range(1200):
		train_err = 0
		train_batches = 0
		for batch in batches(X_train, y_train, 100, epoch):
		    Xt, yt = batch
		    train_err += train_fn(Xt, yt)
		    train_batches += 1
		
	xml1 = T.matrix('xml1')
	Xlt1 = lasagne.layers.get_output(l_out, xml1, deterministic=True)
	f2 = theano.function([xml1], Xlt1)
	preds_valid = f2(X_valid).ravel()
	preds_test = f2(X_test).ravel()
		

	import data_io
	
	cycle = 0 
	filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict'
	data_io.write(os.path.join(output_dir,filename_valid), preds_valid)
	filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict'
	data_io.write(os.path.join(output_dir,filename_test), preds_test)
Пример #37
0
def blender(sd, srd, srf, src, Nworkers, stop_writing, output_dir, basename,
            Lstart, Ltime_budget, train_split, test_split):
    try:

        cycle = 0  #cycle 0 is all zeros
        best_score = -2
        atbest = -2

        while (1):
            try:
                time.sleep(0.5)
                temp_workers_data = []
                workers_data = []
                for wr_no in range(Nworkers):
                    exec("wr_data =  sd.worker" + str(wr_no))
                    if wr_data['done'] > 0:
                        temp_workers_data.append(wr_data)
                wgroups = [i['blend_group'] for i in temp_workers_data]
                for group in np.unique(wgroups):
                    twdata = [
                        i for i in temp_workers_data
                        if i['blend_group'] == group
                    ]
                    twdata = sorted(twdata,
                                    key=itemgetter('score'),
                                    reverse=True)

                    workers_data.append(twdata[0])
                    try:
                        workers_data.append(twdata[1])
                    except:
                        pass

                workers_data_raw = []
                raw0_data = srd.raw_model
                if raw0_data['done'] == 1:
                    workers_data_raw.append(raw0_data)

                raw1_data = srd.raw_model1
                if raw1_data['done'] == 1:
                    workers_data_raw.append(raw1_data)

                raw2_data = srd.raw_model2
                if raw2_data['done'] == 1:
                    workers_data_raw.append(raw2_data)

                raw3_data = srd.raw_model3
                if raw3_data['done'] == 1:
                    workers_data_raw.append(raw3_data)

                raw4_data = srd.raw_model4
                if raw4_data['done'] == 1:
                    workers_data_raw.append(raw4_data)

                raw5_data = srf.model1
                if raw5_data['done'] == 1:
                    workers_data_raw.append(raw5_data)

                raw6_data = src.model1
                if raw6_data['done'] == 1:
                    workers_data_raw.append(raw6_data)

                if len(workers_data_raw) > 0:

                    workers_data_raw = sorted(workers_data_raw,
                                              key=itemgetter('score'),
                                              reverse=True)
                    workers_data.append(workers_data_raw[0])
                    try:
                        workers_data.append(workers_data_raw[1])
                    except:
                        pass
                    try:
                        workers_data.append(workers_data_raw[2])
                    except:
                        pass
                    try:
                        workers_data.append(workers_data_raw[3])
                    except:
                        pass
                    try:
                        workers_data.append(workers_data_raw[4])
                    except:
                        pass
                    try:
                        workers_data.append(workers_data_raw[5])
                    except:
                        pass

                workers_data = sorted(workers_data,
                                      key=itemgetter('score'),
                                      reverse=True)

                if len(workers_data) > 0:
                    worker0 = workers_data[0]
                    preds_valid = worker0['preds_valid']
                    preds_test = worker0['preds_test']

                    y = sd.yt_raw[test_split:]

                    x = worker0['preds_2fld']

                    exec('s0 = libscores.' + sd.LD.info['metric'] +
                         '(y, x, "' + sd.LD.info['task'] + '")')
                    try:
                        if sd.LD.info['task'] != 'regression' and s0 <= 0:
                            exec(
                                'CVscore_auc = libscores.auc_metric(sd.yt_raw[test_split:], preds, "'
                                + sd.LD.info['task'] + '")')
                            s0 += CVscore_auc / 10
                    except:
                        pass
                    best_score = s0

                    try:
                        if s0 > atbest:
                            atbest = best_score
                            if sd.LD.info['target_num'] == 1:
                                if sd.LD.info['task'] != 'regression':
                                    preds_valid = preds_valid[:, 1]
                                    preds_test = preds_test[:, 1]
                            if sd.LD.info['task'] != 'regression':
                                preds_valid = np.clip(preds_valid, 0, 1)
                                preds_test = np.clip(preds_test, 0, 1)
                            filename_valid = basename + '_valid_' + str(
                                cycle).zfill(3) + '.predict'
                            data_io.write(
                                os.path.join(output_dir, filename_valid),
                                preds_valid)
                            filename_test = basename + '_test_' + str(
                                cycle).zfill(3) + '.predict'
                            data_io.write(
                                os.path.join(output_dir, filename_test),
                                preds_test)
                    except:
                        pass

                    Lsample = 4
                    Lssample = Lsample - 1

                    for iter_worker in itertools.combinations(
                            workers_data[:Lsample], 2):
                        worker0 = iter_worker[0]
                        worker1 = iter_worker[1]
                        s01, validt, testt = blend2(
                            worker0['preds_2fld'], worker1['preds_2fld'], y,
                            sd.LD.info['metric'], sd.LD.info['task'],
                            worker0['preds_valid'], worker1['preds_valid'],
                            worker0['preds_test'], worker1['preds_test'])

                        if s01 > best_score:
                            best_score = s01
                            preds_valid = validt
                            preds_test = testt

                    for iter_worker in itertools.combinations(
                            workers_data[:Lssample], 3):
                        worker0 = iter_worker[0]
                        worker1 = iter_worker[1]
                        worker2 = iter_worker[2]
                        s012, validt, testt = blend3(
                            worker0['preds_2fld'], worker1['preds_2fld'],
                            worker2['preds_2fld'], y, sd.LD.info['metric'],
                            sd.LD.info['task'], worker0['preds_valid'],
                            worker1['preds_valid'], worker2['preds_valid'],
                            worker0['preds_test'], worker1['preds_test'],
                            worker2['preds_test'])
                        if s012 > best_score:
                            best_score = s012
                            preds_valid = validt
                            preds_test = testt

                    if stop_writing.is_set() == False and best_score > atbest:
                        atbest = best_score

                        if sd.LD.info['target_num'] == 1:
                            if sd.LD.info['task'] != 'regression':
                                preds_valid = preds_valid[:, 1]
                                preds_test = preds_test[:, 1]
                        if sd.LD.info['task'] != 'regression':
                            preds_valid = np.clip(preds_valid, 0, 1)
                            preds_test = np.clip(preds_test, 0, 1)
                        filename_valid = basename + '_valid_' + str(
                            cycle).zfill(3) + '.predict'
                        data_io.write(os.path.join(output_dir, filename_valid),
                                      preds_valid)
                        filename_test = basename + '_test_' + str(cycle).zfill(
                            3) + '.predict'
                        data_io.write(os.path.join(output_dir, filename_test),
                                      preds_test)

                        #cycle += 1

            except Exception as e:
                print 'exception in blender process' + '     ' + str(e)
                # in case of any problem, let's try again
    except Exception as e:
        print 'exception in blender main process' + '     ' + str(e)
Пример #38
0
def blender(sd, srd, Nworkers, stop_writing, output_dir, basename, Lstart, Ltime_budget, Lfold):
    try:
        split = int(len(sd.LD.data["Y_train"]) * 0.5)
        cycle = 1  # cycle 0 is all zeros
        best_score = 0
        atbest = 0

        while 1:
            try:
                time.sleep(0.5)
                # limit to 100 predictions
                if cycle > (time.time() - Lstart) / Ltime_budget * 100:
                    time.sleep(1)
                    continue

                temp_workers_data = []
                workers_data = []
                for wr_no in range(Nworkers):
                    exec ("wr_data =  sd.worker" + str(wr_no))
                    if wr_data["done"] > 0:
                        temp_workers_data.append(wr_data)
                wgroups = [i["blend_group"] for i in temp_workers_data]
                for group in np.unique(wgroups):
                    twdata = [i for i in temp_workers_data if i["blend_group"] == group]
                    twdata = sorted(twdata, key=itemgetter("score"), reverse=True)

                    workers_data.append(twdata[0])
                    try:
                        workers_data.append(twdata[1])
                    except:
                        pass
                    print group, len(twdata), len(workers_data)

                    # this is patch for codalab VM
                workers_data_raw = []
                raw0_data = srd.raw_model
                if raw0_data["done"] == 1:
                    workers_data_raw.append(raw0_data)

                raw1_data = srd.raw_model1
                if raw1_data["done"] == 1:
                    workers_data_raw.append(raw1_data)

                raw2_data = srd.raw_model2
                if raw2_data["done"] == 1:
                    workers_data_raw.append(raw2_data)

                raw3_data = srd.raw_model3
                if raw3_data["done"] == 1:
                    workers_data_raw.append(raw3_data)

                raw4_data = srd.raw_model4
                if raw4_data["done"] == 1:
                    workers_data_raw.append(raw4_data)

                if len(workers_data_raw) > 0:
                    workers_data_raw = sorted(workers_data_raw, key=itemgetter("score"), reverse=True)
                    workers_data.append(workers_data_raw[0])
                    try:
                        workers_data.append(workers_data_raw[1])
                    except:
                        pass
                    try:
                        workers_data.append(workers_data_raw[2])
                    except:
                        pass

                workers_data = sorted(workers_data, key=itemgetter("score"), reverse=True)

                if len(workers_data) > 0:
                    worker0 = workers_data[0]
                    preds_valid = worker0["preds_valid"]
                    preds_test = worker0["preds_test"]

                    y = sd.yt_raw[split:]
                    if Lfold > 1:
                        y = sd.yt_raw

                    x = worker0["preds_2fld"]

                    exec ("s0 = libscores." + sd.LD.info["metric"] + "(y, x)")
                    best_score = s0

                    # short run can't wait for blend (usable only for AutoML 1)
                    try:
                        if s0 > atbest and cycle < 2:
                            atbest = best_score * 0.9  # not reilable score
                            if sd.LD.info["target_num"] == 1:
                                preds_valid = preds_valid[:, 1]
                                preds_test = preds_test[:, 1]

                            preds_valid = np.clip(preds_valid, 0, 1)
                            preds_test = np.clip(preds_test, 0, 1)
                            filename_valid = basename + "_valid_" + str(cycle).zfill(3) + ".predict"
                            data_io.write(os.path.join(output_dir, filename_valid), preds_valid)
                            filename_test = basename + "_test_" + str(cycle).zfill(3) + ".predict"
                            data_io.write(os.path.join(output_dir, filename_test), preds_test)

                            cycle += 1
                    except:
                        pass

                    if Lfold < 4:
                        Lsample = 4
                    else:
                        Lsample = 6
                    xa = 0
                    Lssample = Lsample - 1

                    for iter_worker in itertools.combinations(workers_data[:Lsample], 2):
                        xa = xa + 1
                        worker0 = iter_worker[0]
                        worker1 = iter_worker[1]
                        s01, validt, testt = blend2(
                            worker0["preds_2fld"],
                            worker1["preds_2fld"],
                            y,
                            sd.LD.info["metric"],
                            worker0["preds_valid"],
                            worker1["preds_valid"],
                            worker0["preds_test"],
                            worker1["preds_test"],
                        )

                        if s01 > best_score:
                            best_score = s01
                            preds_valid = validt
                            preds_test = testt

                    xa = 0

                    for iter_worker in itertools.combinations(workers_data[:Lssample], 3):
                        xa = xa + 1
                        worker0 = iter_worker[0]
                        worker1 = iter_worker[1]
                        worker2 = iter_worker[2]
                        s012, validt, testt = blend3(
                            worker0["preds_2fld"],
                            worker1["preds_2fld"],
                            worker2["preds_2fld"],
                            y,
                            sd.LD.info["metric"],
                            worker0["preds_valid"],
                            worker1["preds_valid"],
                            worker2["preds_valid"],
                            worker0["preds_test"],
                            worker1["preds_test"],
                            worker2["preds_test"],
                        )
                        if s012 > best_score:
                            best_score = s012
                            preds_valid = validt
                            preds_test = testt

                    if stop_writing.is_set() == False:  # until last 10 seconds (event signal)
                        if best_score > atbest:
                            atbest = best_score
                            print "naj =", workers_data[0]["score"], best_score, atbest

                            if sd.LD.info["target_num"] == 1:
                                preds_valid = preds_valid[:, 1]
                                preds_test = preds_test[:, 1]

                            preds_valid = np.clip(preds_valid, 0, 1)
                            preds_test = np.clip(preds_test, 0, 1)
                            filename_valid = basename + "_valid_" + str(cycle).zfill(3) + ".predict"
                            data_io.write(os.path.join(output_dir, filename_valid), preds_valid)
                            filename_test = basename + "_test_" + str(cycle).zfill(3) + ".predict"
                            data_io.write(os.path.join(output_dir, filename_test), preds_test)

                            cycle += 1
                    else:
                        print "stop writing is set"

            except Exception as e:
                print "exception in blender process" + "     " + str(e)
                # in case of any problem, let's try again
    except Exception as e:
        print "exception in blender main process" + "     " + str(e)
Пример #39
0
    
    '''
    R = nn.lrp(Y[na,i]) #compute first layer relevance according to the true class label
    '''
    
    '''
	yselect = 3
    yselect = (np.arange(Y.shape[1])[na,:] == yselect)*1. 
    R = nn.lrp(yselect) #compute first layer relvance for an arbitrarily selected class 
    '''
    
    #render input and heatmap as rgb images
    digit = render.digit_to_rgb(x, scaling = 3)
    hm = render.hm_to_rgb(R, X = x, scaling = 3, sigma = 2)
    digit_hm = render.save_image([digit,hm],'../heatmap.png')
    data_io.write(R,'../heatmap.npy')
    
    #display the image as written to file
    plt.imshow(digit_hm, interpolation = 'none')
    plt.axis('off')
    plt.show()


#note that modules.Sequential allows for batch processing inputs
'''
x = X[:10,:]
y = nn.forward(x)
R = nn.lrp(y)
data_io.write(R,'../Rbatch.npy')
'''
Пример #40
0
    def lrp(self,R,lrp_var=None,param=None):
        '''
        Performs LRP by calling subroutines, depending on lrp_var and param or
        preset values specified via Module.set_lrp_parameters(lrp_var,lrp_param)

        If lrp parameters have been pre-specified (per layer), the corresponding decomposition
        will be applied during a call of lrp().

        Specifying lrp parameters explicitly when calling lrp(), e.g. net.lrp(R,lrp_var='alpha',param=2.),
        will override the preset values for the current call.

        How to use:

        net.forward(X) #forward feed some data you wish to explain to populat the net.
        ## 우선 뉴럴 네트워크 forward를 통해 예측 진행 -- 위에 definition forward 되어 있음.

        then either:

        net.lrp() #to perform the naive approach to lrp implemented in _simple_lrp for each layer
        ## lrp로 실행

        or:

        for m in net.modules:
            m.set_lrp_parameters(...)
        net.lrp() #to preset a lrp configuration to each layer in the net
        # 각 뉴럴 내의 python layer등을 확인하고 lrp 실행

        or:

        net.lrp(somevariantname,someparameter) # to explicitly call the specified parametrization for all layers (where applicable) and override any preset configurations.

        Parameters
        ----------
        R : numpy.ndarray
            final layer relevance values. usually the network's prediction of some data points
            ## 마지막 예측 값. Matrix형싱
            for which the output relevance is to be computed
            dimensionality should be equal to the previously computed predictions

        lrp_var : str
            either 'none' or 'simple' or None for standard Lrp ,
            'epsilon' for an added epsilon slack in the denominator
            'alphabeta' or 'alpha' for weighting positive and negative contributions separately. param specifies alpha with alpha + beta = 1
            'flat' projects an upper layer neuron's relevance uniformly over its receptive field.
            'ww' or 'w^2' only considers the square weights w_ij^2 as qantities to distribute relevances with.

        param : double
            the respective parameter for the lrp method of choice

        Returns
        -------

        R : numpy.ndarray
            the first layer relevances as produced by the neural net wrt to the previously forward
            passed input data. dimensionality is equal to the previously into forward entered input data
            # 이전 예측 R 값이 백프로퍼게이션 진행되고 input 됨. 

        Note
        ----

        Requires the net to be populated with temporary variables, i.e. forward needed to be called with the input
        for which the explanation is to be computed. calling clean in between forward and lrp invalidates the
        temporary data
        '''

        for m in self.modules[::-1]:
#             print("check : {}".format(m))
            R = m.lrp(R,lrp_var,param)
#             Rfile = '../r_array/sequential_'+str(m)+'_'+str(st)+'cnt_'+'.npy'
#             data_io.write(R,Rfile)
#             print("sequential is saved")
            if R.shape[0] == 1 and R.shape[1] == 32:
#                 print("check_point")
#                 print(R.shape)
                data_io.write(R, '../r_array/{0}.npy'.format("norm"))
            elif R.shape[0] == 1 and R.shape[1] == 28:
#                 print("check_point")
#                 print(R.shape)
                data_io.write(R, '../r_array/{0}.npy'.format("first"))
            elif R.shape[0] == 1 and R.shape[1] == 14:
#                 print("check_point")
#                 print(R.shape)
                data_io.write(R, '../r_array/{0}.npy'.format("sec"))
            elif R.shape[0] == 1 and R.shape[1] == 5:
#                 print("check_point")
#                 print(R.shape)
                data_io.write(R, '../r_array/{0}.npy'.format("thrd"))
            elif R.shape[1] == 1 and R.shape[3] == 100:
#                 print("check_point")
#                 print(R.shape)
                data_io.write(R, '../r_array/{0}.npy'.format("foth"))
            else:
                pass
        data_io.write(R, '../r_array/sequential.npy')
#         print("sequential Rx is saved")
        return R
Пример #41
0
         
         # Make predictions
         # -----------------
         Y_valid = M.predict_proba(D.data['X_valid'])
         Y_test = M.predict_proba(D.data['X_test'])                         
         vprint( verbose,  "[+] Prediction success, time spent so far %5.2f sec" % (time.time() - start))
         # Write results
         # -------------
         if overwrite_output:
             filename_valid = basename + '_valid.predict'                
             filename_test = basename + '_test.predict'
         else:
             filename_valid = basename + '_valid_' + the_date + '.predict'                
             filename_test = basename + '_test_' + the_date + '.predict' 
         vprint( verbose, "======== Saving results to: " + output_dir)
         data_io.write(os.path.join(output_dir,filename_valid), Y_valid)
         data_io.write(os.path.join(output_dir,filename_test), Y_test)
         vprint( verbose,  "[+] Results saved, time spent so far %5.2f sec" % (time.time() - start))
         time_spent = time.time() - start 
         time_left_over = time_budget - time_spent
         vprint( verbose,  "[+] End cycle, time left %5.2f sec" % time_left_over)
         if time_left_over<=0: break
     # Clean up
     del D
     del M
     gc.collect()
         
 if zipme:
     vprint( verbose,  "========= Zipping this directory to prepare for submit ==============")
     data_io.zipdir(submission_filename + '.zip', ".")
     vprint( verbose,  "See: " + submission_filename + '.zip')
model_dir = 'sample_code_submission/'
problem_dir = 'ingestion_program/'
score_dir = 'scoring_program/'
from sys import path
path.append(model_dir)
path.append(problem_dir)
path.append(score_dir)
import numpy as np

datadir = '../public_data'
from data_manager import DataManager
D = DataManager(dataname, datadir, replace_missing=True)

X_train = D.data['X_train']
Y_train = D.data['Y_train']

model = Model(time_left_for_this_task=1200)  # Change the time budget!!!!
model.fit(X_train, Y_train)

Y_hat_valid = model.predict(D.data['X_valid'])
Y_hat_test = model.predict(D.data['X_test'])

result_name = 'sample_result_submission/' + dataname
from data_io import write
write(result_name + '_valid.predict', Y_hat_valid)
write(result_name + '_test.predict', Y_hat_test)

from subprocess import call
call(["zip", "-rj", "autosklearn", "sample_result_submission/"])