def classicNeuralNetwork(self, features, labels, autoencoder=False): dataSet = SupervisedDataSet(features.shape[1], 1) dataSet.setField('input', features) if autoencoder: labels = features dataSet.setField('target', labels) tstdata, trndata = dataSet.splitWithProportion(0.25) print features.shape simpleNeuralNetwork = _buildNetwork(\ (LinearLayer(features.shape[1],'in'),),\ (SigmoidLayer(20,'hidden0'),),\ (LinearLayer(labels.shape[1],'out'),),\ bias=True) trainer = BackpropTrainer(simpleNeuralNetwork, dataset=trndata, verbose=True) #, momentum=0.1) trainer.trainUntilConvergence(maxEpochs=15) trnresult = percentError(trainer.testOnData(dataset=trndata), trndata['target']) tstresult = percentError(trainer.testOnData(dataset=tstdata), tstdata['target']) print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % tstresult self.neuralNetwork = simpleNeuralNetwork
def main(): #read in pre-processed features print('reading preprocessed data') bag = read_bag_of_word('features') #read in sentimental dictionary print('reading dictionary') [word_vector, sentiments] = read_dictionary("positive.txt", "negative.txt") features,target,features_dict=create_feature_matrix(bag, sentiments) # Sort dates in order dates=dow_jones_labels.keys() dates = [datetime.datetime.strptime(ts, "%Y-%m-%d") for ts in dates] dates.sort() dates = [datetime.datetime.strftime(ts, "%Y-%m-%d") for ts in dates] ds = SupervisedDataSet(4, 1) ds.setField('input', features) target=np.array(target).reshape( -1, 1 ) ds.setField('target', target) net = buildNetwork(4, 40, 1, bias=True) trainer = BackpropTrainer(net, ds) trainer.trainUntilConvergence(verbose=True, validationProportion=0.15, maxEpochs=10000, continueEpochs=10) count=0 for i in range(0,len(target)): print("predict={0},actual={1}".format(net.activate(features[i]),target[i])) if net.activate(features[i])*target[i]>0: count+=1 print("accuracy={0}".format(float(count) / len(dow_jones_labels)))
def get_nn_dom_prediction(train_data, train_truth, test_data, test_truth, hidden=(5, ), weight_decay=0.0): # Convert data to capture dominance. train_data, test_data = tuple( map(_convert_to_individual_alleles, [train_data, test_data])) mean = np.mean(train_truth) sd = np.std(train_truth) # Supervised training dataset. ds = SupervisedDataSet(train_data.shape[1], 1) ds.setField('input', train_data) ds.setField('target', (train_truth[:, np.newaxis] - mean) / sd) net = _get_nn(train_data.shape[1], hidden) _train_nn(net, ds, weight_decay) # Unsupervised (test) dataset. test_ds = UnsupervisedDataSet(test_data.shape[1]) test_ds.setField('sample', test_data) predicted = net.activateOnDataset(test_ds) * sd + mean return predicted.ravel()
def train(self, x, y): ''' Trains on the given inputs and labels for either a fixed number of epochs or until convergence. Normalizes the input with a z-transform''' print "training..." # normalize input m = x.mean() s = x.std() x = self.z_transform(x, m, s) ds = SupervisedDataSet(x.shape[1], 1) ds.setField('input', x) ds.setField('target', y) trainer = BackpropTrainer(self.n,ds, learningrate=self.learning_rate, momentum=self.momentum, verbose=True) if (self.epochs == 0): trainer.trainUntilConvergence() else: for i in range(0, self.epochs): start_time = time.time() trainer.train() print "epoch: ", i print "time: ", time.time() - start_time, " seconds" print "finished"
def _prepare_dataset(self, X, y, model_type): """ Prepare data in pybrain format. :param pandas.DataFrame X: data of shape [n_samples, n_features] :param y: values for samples --- array-like of shape [n_samples] :param str model_type: classification or regression label :return: self """ X, y, sample_weight = check_inputs(X, y, sample_weight=None, allow_none_weights=True, allow_multiple_targets=model_type == 'regression') X = self._transform_data(X, y, fit=not self._is_fitted()) if model_type == 'classification': if not self._is_fitted(): self._set_classes(y) target = one_hot_transform(y, n_classes=len(self.classes_)) elif model_type == 'regression': if len(y.shape) == 1: target = y.reshape((len(y), 1)) else: # multi regression target = y if not self._is_fitted(): self.n_targets = target.shape[1] else: raise ValueError('Wrong model type') dataset = SupervisedDataSet(X.shape[1], target.shape[1]) dataset.setField('input', X) dataset.setField('target', target) return dataset
def _prepare_dataset(self, X, y, model_type): X, y, sample_weight = check_inputs(X, y, sample_weight=None, allow_none_weights=True, allow_multiple_targets=model_type == 'regression') X = self._transform_data(X, y, fit=not self.is_fitted()) if model_type == 'classification': if not self.is_fitted(): self._set_classes(y) target = one_hot_transform(y, n_classes=len(self.classes_)) elif model_type == 'regression': if len(y.shape) == 1: target = y.reshape((len(y), 1)) else: # multi regression target = y if not self.is_fitted(): self.n_targets = target.shape[1] else: raise ValueError('Wrong model type') dataset = SupervisedDataSet(X.shape[1], target.shape[1]) dataset.setField('input', X) dataset.setField('target', target) return dataset
def get_dataset_txt(filename): """ creates a dataset for the neural network to use input type: string represnting filename created from numpy array return type: dataset """ array = np.loadtxt(filename) # assume last field in txt is single target variable # and all other fields are input variables number_of_columns = array.shape[1] # dataset = ClassificationDataSet(number_of_columns - 1, 1, nb_classes=4, # class_labels=['angry','happy','neutral','sad']) dataset = SupervisedDataSet(number_of_columns - 1, 4) #print array[0] #print array[:,:-1] #print array[:,-1] #dataset.addSample(array[:,:-1], array[:,-1]) #dataset.addSample(array[:,:-1], array[:,-2:-1]) dataset.setField('input', array[:,:-4]) dataset.setField('target', array[:,-4:]) ## # one output neuron per class ## dataset._convertToOneOfMany( ) ## ## print dataset.getField('target').transpose() ## print dataset.getField('class').transpose() return dataset
def test_simple_predictor(self): output_model_file = "Train/TestData/model.pkl" test_file = "Train/TestData/test.csv" prediction_file = "Train/TestData/prediction.txt" net = pickle.load(open(output_model_file, 'rb')) test = np.loadtxt(test_file, delimiter=',') x_test = test[:, 0:-1] y_test = test[:, -1] y_test = y_test.reshape(-1, 1) y_test_dummy = np.zeros(y_test.shape) input_size = x_test.shape[1] target_size = y_test.shape[1] assert(net.indim == input_size) assert(net.outdim == target_size) ds = SupervisedDataSet(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test_dummy) p = net.activateOnDataset(ds) # mse = MSE(y_test, p) # rmse = sqrt(mse) # print "testing RMSE:{}".format(rmse) np.savetxt(prediction_file, p, fmt='%.6f')
def _prepare_dataset(self, X, y, model_type): X, y, sample_weight = check_inputs( X, y, sample_weight=None, allow_none_weights=True, allow_multiple_targets=model_type == 'regression') X = self._transform_data(X, y, fit=not self.is_fitted()) if model_type == 'classification': if not self.is_fitted(): self._set_classes(y) target = one_hot_transform(y, n_classes=len(self.classes_)) elif model_type == 'regression': if len(y.shape) == 1: target = y.reshape((len(y), 1)) else: # multi regression target = y if not self.is_fitted(): self.n_targets = target.shape[1] else: raise ValueError('Wrong model type') dataset = SupervisedDataSet(X.shape[1], target.shape[1]) dataset.setField('input', X) dataset.setField('target', target) return dataset
def _prepare_net_and_dataset(self, X, y, model_type): X, y, sample_weight = check_inputs(X, y, sample_weight=None, allow_none_weights=True) self._check_init_input(self.layers, self.hiddenclass) X = self._transform_data(X, y, fit=True) if self.layers is None: self.layers = [10] if self.hiddenclass is None: self.hiddenclass = [] for i in range(len(self.layers)): self.hiddenclass.append('SigmoidLayer') net_options = {'bias': True, 'outputbias': True, 'peepholes': False, 'recurrent': False} for key in self.params: if key not in net_options.keys(): raise ValueError('Unexpected parameter ' + key) net_options[key] = self.params[key] net_options['hiddenclass'] = LAYER_CLASS[self.hiddenclass[0]] net_options['fast'] = False if model_type == 'classification': net_options['outclass'] = structure.SoftmaxLayer self._set_classes(y) layers_for_net = [X.shape[1], self.layers[0], len(self.classes_)] ds = SupervisedDataSet(X.shape[1], len(self.classes_)) y = y.reshape((len(y), 1)) label = numpy.array(OneHotEncoder(n_values=len(self.classes_)).fit_transform(y).todense()) for i in range(0, len(y)): ds.addSample(tuple(X[i, :]), tuple(label[i])) elif model_type == 'regression': net_options['outclass'] = structure.LinearLayer if len(y.shape) == 1: y = y.reshape((len(y), 1)) layers_for_net = [X.shape[1], self.layers[0], y.shape[1]] ds = SupervisedDataSet(X.shape[1], y.shape[1]) ds.setField('input', X) ds.setField('target', y) else: raise ValueError('Wrong model type') self.net = buildNetwork(*layers_for_net, **net_options) for i in range(1, len(self.layers)): hid_layer = LAYER_CLASS[self.hiddenclass[i]](self.layers[i]) self.net.addModule(hid_layer) self.net.sortModules() return ds
def train(self, input): nm = self.nm ds = SupervisedDataSet(nm, nm) ds.setField('input', np.transpose(np.delete(input, input.shape[1] - 1, 1))) ds.setField('target', np.transpose(np.delete(input, 0, 1))) trainer = BackpropTrainer(self.network, ds) trainer.train()
def buildDataSet(timeCat, length): ds = SupervisedDataSet(5,1) #initialize dataset (inputs, outputs) MACDvalueArray = np.array([0]) KValueArray = np.array([0]) priceArray = np.array([0]) polo = poloniex(POLO_API_KEY, POLO_SECRET) if(timeCat == "days"): startTime = datetime.datetime.utcnow() + datetime.timedelta(days=length) elif(timeCat == "hours"): startTime = datetime.datetime.utcnow() + datetime.timedelta(hours=length) unixTime = calendar.timegm(startTime.utctimetuple()) endTime = calendar.timegm(datetime.datetime.utcnow().utctimetuple()) chartData = polo.returnChartData(unixTime,endTime,300) #get all our data! start time, end time, period ia = np.array([0,0,0,0,0]) #heres our input array ta = np.array([0]) #and the output for i in chartData: #calculate our indicators calculateMACD(i['close']) calculateStchOsc(i['close'],i['high'],i['low']) MACDvalueArray = np.vstack((MACDvalueArray,MACD_Histo)) KValueArray = np.vstack((KValueArray,KValue)) priceArray = np.vstack((priceArray,i['close'])) #delete the first one because its all 0s MACDvalueArray = np.delete(MACDvalueArray,0,0) KValueArray = np.delete(KValueArray,0,0) priceArray = np.delete(priceArray,0,0) MACD_max = max(MACDvalueArray) MACD_min = min(MACDvalueArray) K_max = max(KValueArray) K_min = min(KValueArray) price_max = max(priceArray) price_min = min(priceArray) #make a scaling function... Neural nets work better if all the input values are in the same range. Here we map to values between 0,1 m = interp1d([MACD_min[0],MACD_max[0]],[0,1]) k = interp1d([K_min[0],K_max[0]],[0,1]) p = interp1d([price_min[0],price_max[0]],[0,1]) #result = interp1d([0,1],[price_min[0],price_max[0]]) for i in range(0,priceArray.size): scaledM = float(m(MACDvalueArray[i])) scaledK = float(k(KValueArray[i])) scaledP = float(p(priceArray[i])) #build the input and output arrays ia = np.vstack((ia,[scaledM,scaledK,ppr[0],ppr[1],ppr[2]])) ta = np.vstack((ta,[scaledP])) #this is a queue that keeps the last 3 values, appendleft for FIFO action ppr.appendleft(scaledP) np.savetxt('test1.out',ia,delimiter=',') #delete first 15 values because thats how long the MACD takes to get initialized to proper values for i in range(0,15): ia = np.delete(ia,0,0) ta = np.delete(ta,0,0) np.savetxt('test2.out',ia,delimiter=',') #this was just for testing, outputs all data to text file assert (ia.shape[0] == ta.shape[0]) #make sure input and output are same size ds.setField('input',ia) ds.setField('target',ta) print(str(len(ds))) #print out how many data points we have return ds
def initialize_dataset(regression_task, train_x, train_y): number_of_features = train_x.shape[1] if regression_task: ds = SupervisedDataSet(number_of_features, 1) else: ds = ClassificationDataSet(number_of_features, nb_classes=2, class_labels=['no success', '1st down or TD']) ds.setField('input', train_x) ds.setField('target', train_y.reshape((len(train_y), 1))) return ds, number_of_features
def castToRegression(self, values): """Converts data set into a SupervisedDataSet for regression. Classes are used as indices into the value array given.""" regDs = SupervisedDataSet(self.indim, 1) fields = self.getFieldNames() fields.remove('target') for f in fields: regDs.setField(f, self[f]) regDs.setField('target', values[self['class'].astype(int)]) return regDs
def castToRegression(self, values): """Converts data set into a SupervisedDataSet for regression. Classes are used as indices into the value array given.""" regDs = SupervisedDataSet(self.indim, 1) fields = self.getFieldNames() fields.remove('target') for f in fields: regDs.setField(f, self[f]) regDs.setField('target', values[self['class'].astype(int)]) return regDs
def neuralNetworkRegression(X,Y): print ("NEURAL NETWORK REGRESSION") print ("Executing...") X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size = 0.10, random_state = 5) Y_test = Y_test.reshape(-1,1) Y_train = Y_train.reshape(-1,1) RMSEerror = [] train = np.vstack((X_train, X_test)) # append both testing and training into one array outputTrain = np.vstack((Y_train, Y_test)) outputTrain = outputTrain.reshape( -1, 1 ) inputSize = train.shape[1] targetSize = outputTrain.shape[1] ds = SupervisedDataSet(inputSize, targetSize) ds.setField('input', train) ds.setField('target', outputTrain) hiddenSize = 100 epochs = 100 # got after parameter tuning # neural network training model net = buildNetwork( inputSize, hiddenSize, targetSize, bias = True ) trainer = BackpropTrainer(net, ds) # uncomment out to plot epoch vs rmse # takes time to execute as gets best epoch value # getting the best value of epochs print ("training for {} epochs...".format( epochs )) ''' for i in range(epochs): print (i) mse = trainer.train() rmse = mse ** 0.5 RMSEerror.append(rmse) plt.plot(range(epochs), RMSEerror) plt.xlabel("Epochs") plt.ylabel("RMSE") plt.title("RMSE vs Epochs") plt.savefig("../Graphs/Network/Question 2c/RMSE vs Epochs.png") plt.show() ''' print ("Model training in process...") train_mse, validation_mse = trainer.trainUntilConvergence(verbose = True, validationProportion = 0.15, maxEpochs = epochs, continueEpochs = 10) p = net.activateOnDataset(ds) mse = mean_squared_error(outputTrain, p) rmse = mse ** 0.5 print ("Root Mean Squared Error for Best Parameters : " + str(rmse))
def _activate_on_dataset(self, X): assert self.is_fitted(), "Net isn't fitted, please call 'fit' first" X = self._transform_data(X, fit=False) y_test_dummy = numpy.zeros((len(X), 1)) ds = SupervisedDataSet(X.shape[1], y_test_dummy.shape[1]) ds.setField('input', X) ds.setField('target', y_test_dummy) return self.net.activateOnDataset(ds)
def _activate_on_dataset(self, X): assert self.is_fitted(), "Net isn't fitted, please call 'fit' first" X = self._transform_data(X, fit=False) y_test_dummy = numpy.zeros((len(X), 1)) ds = SupervisedDataSet(X.shape[1], y_test_dummy.shape[1]) ds.setField('input', X) ds.setField('target', y_test_dummy) return self.net.activateOnDataset(ds)
def __get_supervised_dataset__(self, data): number_of_columns = data.shape[1] dataset = SupervisedDataSet(number_of_columns - 1, 1) input_data = data[:, :-1] print input_data.shape dataset.setField('input', input_data) # out_data = data[:, -1] out_data = out_data.reshape(out_data.size, 1) print out_data.shape dataset.setField('target', out_data) # return dataset
def initialize_dataset(regression_task, train_x, train_y): number_of_features = train_x.shape[1] if regression_task: ds = SupervisedDataSet(number_of_features, 1) else: ds = ClassificationDataSet( number_of_features, nb_classes=2, class_labels=['no success', '1st down or TD']) ds.setField('input', train_x) ds.setField('target', train_y.reshape((len(train_y), 1))) return ds, number_of_features
def train(self, x_train=None, y_train=None): if x_train is None and y_train is None: x_train, y_train = shuffle(self.processed_data, self.processed_labels) ds = SupervisedDataSet(x_train.shape[1], 1) assert(x_train.shape[0] == y_train.shape[0]) ds.setField('input', x_train) ds.setField('target', y_train) if self.hidden_size == 0: hs = x_train.shape[1] self.nn = buildNetwork(x_train.shape[1], hs, 1, bias=True, hiddenclass=self.hiddenclass, outclass=self.outclass) trainer = BackpropTrainer(self.nn, ds, verbose=self.verbose) trainer.trainUntilConvergence(maxEpochs=self.maxEpochs)
def _set_dataset(self, trn_index, tst_index): ''' set the dataset according to the index of the training data and the test data Then do feature normalization ''' this_trn = self.tot_descs[trn_index] this_tst = self.tot_descs[tst_index] this_trn_target = self.tot_target[trn_index] this_tst_target = self.tot_target[tst_index] # get the normalizer trn_normalizer = self._getNormalizer(this_trn) # feature normal and target log for traning data trn_normed = self._featureNorm(this_trn, trn_normalizer) trn_log_tar = np.log(this_trn_target) # feature normalization for the test data, with the normalizer of the training data tst_normed = self._featureNorm(this_tst, trn_normalizer) tst_log_tar = np.log(this_tst_target) trn_ds_ann = SupervisedDataSet(self.indim, self.outdim) trn_ds_ann.setField('input', trn_normed) trn_log_tar = trn_log_tar.reshape((trn_log_tar.shape[0],1)) trn_ds_ann.setField('target', trn_log_tar) tst_ds_ann = SupervisedDataSet(self.indim, self.outdim) tst_ds_ann.setField('input', tst_normed) tst_log_tar = tst_log_tar.reshape((tst_log_tar.shape[0],1)) tst_ds_ann.setField('target', tst_log_tar) return trn_ds_ann, tst_ds_ann
def main(): args = parser.parse_args() hidden_size = 50 epochs = 5 dataset_len = _params_count(args.model_folder, 'params_train.txt') rows_per_step = 10000 total_batches = dataset_len // rows_per_step params_len = _params_count(args.model_folder, 'dict.txt') output_layer_num = 601 net = _init_net(params_len, output_layer_num, hidden_size) for batch_num in range(total_batches - 1): trainParams = _build_params(os.path.join(args.model_folder, 'params_train.txt'), args.model_folder, batch_num, rows_per_step) print('params ready') y = [] for y_val in trainParams['y']: y_vec = [0] * output_layer_num y_vec[y_val - 1] = 1 y.append(y_vec) print(len(trainParams['x'])) print(len(y)) # TODO: fix the number of pictures ds = SupervisedDataSet(params_len, output_layer_num) ds.setField('input', trainParams['x']) ds.setField('target', y) trainer = BackpropTrainer(net, ds) print("training for {} epochs...".format(epochs)) #trainer.trainUntilConvergence(verbose=True) for i in range(epochs): mse = trainer.train() rmse = sqrt(mse) print("training RMSE, epoch {}: {}".format(i + 1, rmse)) pickle.dump(net, open(os.path.join(args.model_folder, 'model_nn.pkl'), 'wb'))
def main(): args = parser.parse_args() hidden_size = 50 epochs = 5 dataset_len = _params_count(args.model_folder, 'params_train.txt') rows_per_step = 10000 total_batches = dataset_len // rows_per_step params_len = _params_count(args.model_folder, 'dict.txt') output_layer_num = 601 net = _init_net(params_len, output_layer_num, hidden_size) for batch_num in range(total_batches - 1): trainParams = _build_params( os.path.join(args.model_folder, 'params_train.txt'), args.model_folder, batch_num, rows_per_step) print('params ready') y = [] for y_val in trainParams['y']: y_vec = [0] * output_layer_num y_vec[y_val - 1] = 1 y.append(y_vec) print(len(trainParams['x'])) print(len(y)) # TODO: fix the number of pictures ds = SupervisedDataSet(params_len, output_layer_num) ds.setField('input', trainParams['x']) ds.setField('target', y) trainer = BackpropTrainer(net, ds) print("training for {} epochs...".format(epochs)) #trainer.trainUntilConvergence(verbose=True) for i in range(epochs): mse = trainer.train() rmse = sqrt(mse) print("training RMSE, epoch {}: {}".format(i + 1, rmse)) pickle.dump(net, open(os.path.join(args.model_folder, 'model_nn.pkl'), 'wb'))
def train_with_shuffle(inp, targ, nn, epoch): ds = SupervisedDataSet(len(inp), len(targ)) ds.setField('input', inp) ds.setField('target', targ) trainer = BackpropTrainer(nn, ds) trainer.trainUntilConvergence(verbose=True, validationProportion=0.15, maxEpochs=100, continueEpochs=10) for i in range(epoch): mse = trainer.train() rmse = np.sqrt(mse) print "training RSME, epoch {}: {}".format(i+1, rmse) return nn
def _prepare_dataset(self, x_data, y_data): assert x_data.shape[0] == y_data.shape[0] if len(y_data.shape) == 1: y_matrix = np.matrix(y_data).T else: y_matrix = y_data.values assert x_data.shape[1] == self.net.indim assert y_matrix.shape[1] == self.net.outdim data_set = SupervisedDataSet(self.net.indim, self.net.outdim) data_set.setField("input", x_data) data_set.setField("target", y_matrix) return data_set
def xtest_simple_trainer(self): # http://fastml.com/pybrain-a-simple-neural-networks-library-in-python/ # https://github.com/zygmuntz/pybrain-practice/blob/master/kin_train.py train_file = "Train/TestData/train.csv" validation_file = "Train/TestData/validation.csv" output_model_file = "Train/TestData/model.pkl" train = np.loadtxt(train_file, delimiter=',') validation = np.loadtxt(validation_file, delimiter=',') train = np.vstack((train, validation)) x_train = train[:,0:-1] y_train = train[:,-1] y_train = y_train.reshape(-1, 1) input_size = x_train.shape[1] num_inputs = x_train.shape[0] target_size = y_train.shape[1] hidden_size = 100 epochs = 50 continue_epochs = 10 validation_proportion = 0.15 print("x_size {0}, {1}, y_size {2}".format(input_size, num_inputs, target_size)) ds = SupervisedDataSet(input_size, target_size) ds.setField( 'input', x_train) ds.setField( 'target', y_train) network = buildNetwork(input_size, hidden_size, target_size, bias=True) trainer = BackpropTrainer(network, ds, learningrate=0.05, lrdecay=0.99999) # for i in range(epochs): # mse = trainer.train() # rmse = sqrt(mse) # print "training RMSE, epoch {}: {}".format(i+1, rmse) train_mse, validation_mse = trainer.trainUntilConvergence( verbose = True, validationProportion = validation_proportion, maxEpochs = epochs, continueEpochs = continue_epochs ) train_rmse = [sqrt(x) for x in train_mse] print "training RMSE {}".format(train_rmse) pickle.dump(network, open( output_model_file, 'wb'))
def train_with_shuffle(inp, targ, nn, epoch): ds = SupervisedDataSet(len(inp), len(targ)) ds.setField('input', inp) ds.setField('target', targ) trainer = BackpropTrainer(nn, ds) trainer.trainUntilConvergence(verbose=True, validationProportion=0.15, maxEpochs=100, continueEpochs=10) for i in range(epoch): mse = trainer.train() rmse = np.sqrt(mse) print "training RSME, epoch {}: {}".format(i + 1, rmse) return nn
def predict(self, X): """ Predict values for all events in dataset. :param X: pandas.DataFrame of shape [n_samples, n_features] :rtype: numpy.array of shape [n_samples] with predicted values """ assert self._is_fitted(), "regressor isn't fitted, please call 'fit' first" X = self._transform_data(X, fit=False) y_test_dummy = numpy.zeros((len(X), 1)) ds = SupervisedDataSet(X.shape[1], y_test_dummy.shape[1]) ds.setField('input', X) ds.setField('target', y_test_dummy) return self.net.activateOnDataset(ds)
def _activate_on_dataset(self, X): """ Predict data. :param pandas.DataFrame X: data to be predicted :return: array-like predictions [n_samples, n_targets] """ assert self._is_fitted(), "Net isn't fitted, please call 'fit' first" X = self._transform_data(X, fit=False) y_test_dummy = numpy.zeros((len(X), 1)) ds = SupervisedDataSet(X.shape[1], y_test_dummy.shape[1]) ds.setField('input', X) ds.setField('target', y_test_dummy) return self.net.activateOnDataset(ds)
def _activate_on_dataset(self, X): """ Predict data. :param pandas.DataFrame X: data to be predicted :return: array-like predictions [n_samples, n_targets] """ assert self._is_fitted(), "Net isn't fitted, please call 'fit' first" X = self._transform_data(X, fit=False) y_test_dummy = numpy.zeros((len(X), 1)) ds = SupervisedDataSet(X.shape[1], y_test_dummy.shape[1]) ds.setField('input', X) ds.setField('target', y_test_dummy) return self.net.activateOnDataset(ds)
def cross_vaildate(self): n_folds = 5 max_epochs = self.num_epochs l = self.ds.getLength() inp = self.ds.getField("input") tar = self.ds.getField("target") indim = self.ds.indim outdim = self.ds.outdim assert l > n_folds perms = array_split(permutation(l), n_folds) perf = 0. for i in range(n_folds): # determine train indices train_perms_idxs = list(range(n_folds)) train_perms_idxs.pop(i) temp_list = [] for train_perms_idx in train_perms_idxs: temp_list.append(perms[ train_perms_idx ]) train_idxs = concatenate(temp_list) # determine test indices test_idxs = perms[i] # train train_ds = SupervisedDataSet(indim, outdim) train_ds.setField("input" , inp[train_idxs]) train_ds.setField("target" , tar[train_idxs]) temp_trainer = copy.deepcopy(self.trainer) temp_trainer.setData(train_ds) if not max_epochs: temp_trainer.train() else: temp_trainer.trainEpochs(max_epochs) # test test_ds = SupervisedDataSet(indim, outdim) test_ds.setField("input" , inp[test_idxs]) test_ds.setField("target" , tar[test_idxs]) perf += self.myCalculatePerformance(temp_trainer, test_ds) perf /= n_folds return perf
def test(self): x, y = shuffle(self.processed_data, self.processed_labels, random_state=42) x_train, x_test = x[:int(0.9*len(x))], x[int(0.9*len(x)):] y_train, y_test = y[:int(0.9*len(y))], y[int(0.9*len(y)):] self.train(x_train, y_train) test_ds = SupervisedDataSet(x_test.shape[1], 1) test_ds.setField('input', x_test) test_ds.setField('target', y_test) preds = self.nn.activateOnDataset(test_ds) counter = 0 success = 0 for i in range(0, len(preds)-1, 2): counter += 1 if (preds[i][0] > preds[i+1][0] and y_test[i][0] > y_test[i+1][0]) or (preds[i][0] < preds[i+1][0] and y_test[i][0] < y_test[i+1][0]): success += 1 print 'Accuracy on test set:', float(success) / counter
def predict(self, X): """ Predict values for all events in dataset. :param X: pandas.DataFrame of shape [n_samples, n_features] :rtype: numpy.array of shape [n_samples] with predicted values """ assert self._is_fitted( ), "regressor isn't fitted, please call 'fit' first" X = self._transform_data(X, fit=False) y_test_dummy = numpy.zeros((len(X), 1)) ds = SupervisedDataSet(X.shape[1], y_test_dummy.shape[1]) ds.setField('input', X) ds.setField('target', y_test_dummy) return self.net.activateOnDataset(ds)
def train_network(train,target): """ Trains via linear regression from the target and train data Arguments: train : an array of training data target: an array of associated target data Returns: The trained model """ print("Setting up Neural Network data with %s train features and %s targets sets" % (len(train[0]), len(target[0]))) data = SupervisedDataSet(len(train[0]),len(target[0])) data.setField('input', train) data.setField('target', target) n = NNregression(data) n.setupNN() print("Training Neural Network on %s training sets" % len(data)) n.runTraining() return n.Trainer.module
def NN_analysis(features, labels): #training svm print('start to train neural network') print('get dates') dates = list(labels.keys()) dates = [datetime.datetime.strptime(ts, "%Y-%m-%d") for ts in dates] dates.sort() print(dates) x = [] y = [] print('make vectors') for date in dates[1:62]: time_stamp = datetime.datetime.strftime(date, "%Y-%m-%d") feature = np.array(features[time_stamp]) for i in range(1, 2): temp = date - datetime.timedelta(days=i) a = np.array(features[datetime.datetime.strftime(temp, "%Y-%m-%d")]) feature = np.add(feature, a) #print(list(feature)) x.append(feature) y.append(labels[time_stamp]) print('NN training starts') x = np.array(x) #x.reshape(-1,1) ds = SupervisedDataSet(len(x[0]), 1) ds.setField('input', x) y = np.array(y).reshape(-1, 1) ds.setField('target', y) net = buildNetwork(len(x[0]), 500, 1, bias=True) trainer = BackpropTrainer(net, ds) trainer.trainUntilConvergence(verbose=True, validationProportion=0.15, maxEpochs=500, continueEpochs=10) print("fit finished") #training_indices=np.random.choice(len(dates), len(dates)/10) count = 0 for i in range(0, len(x)): print("predict={0},actual={1}".format(net.activate(x[i]), y[i])) if net.activate(x[i]) * y[i] > 0: count += 1 print("accuracy={0}".format(float(count) / len(labels)))
def train(self, train_dir, test_dir, dataset_path=None, dump_dataset=True): testset = SupervisedDataSet(len(self.summarizer.get_features()), 1) min_maxs = [[100, 0] for i in range(len(self.summarizer.get_features()))] if dataset_path and dataset_path != 'None': dataset = load_from_file(dataset_path) min_maxs = load_from_file("meta_model.xml") # sprawidzć ścieżke! else: dataset = SupervisedDataSet(len(self.summarizer.get_features()), 1) for root, dirs, files in os.walk(train_dir, topdown=False): for file_ds in self.process_dir(self.summarizer, root, files): for ds in file_ds: dataset.addSample(ds[0], ds[1]) min_maxs = self.update_min_maxs(min_maxs, ds[0]) # break # remove this !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # print min_maxs inp = [] for d in dataset['input']: inp.append([normalize(val, min_maxs[i][0], min_maxs[i][1]) for i, val in enumerate(d)]) dataset.setField("input", inp) # print dataset['input'] ### TEMP # save_dataset_as_csv(dataset) if dump_dataset: save_to_file(dataset, "dataset.xml") if test_dir: for root, dirs, files in os.walk(test_dir, topdown=False): for file_ds in self.process_dir(self.summarizer, root, files): for ds in file_ds: testset.addSample(ds[0], ds[1]) print "[Trainer] -> training..." save_to_file(min_maxs, self.features.replace("features.txt", "meta_model.xml")) self.train_method(self.summarizer, dataset, testset, self.features.replace("features.txt", "model.xml"))
def train(self, input_vectors, expected_output_vectors, params = dict()): ''' Trains the network by solving for its weights input_vectors: 2-d array of reals expected_output_vectors: 2-d array of reals ''' nIn = input_vectors.shape[1] nOut = expected_output_vectors.shape[1] ann = self._constructNetwork(nIn, nOut, params) ds = SupervisedDataSet(nIn, nOut) ds.setField('input', input_vectors) ds.setField('target', expected_output_vectors) trainer = BackpropTrainer(ann, ds) trainer.trainUntilConvergence() self.ann = ann
def test(trained): """ Builds a new test dataset and tests the trained network on it. """ X = [] y = [] for i in range(3, 10): x = normalvariate(i, 1.6) X.append([x, x + 1]) y.append([4 * x ** 2]) poly = PolynomialFeatures(2) X_poly = poly.fit_transform(X) testdata = SupervisedDataSet(5, 1) testdata.setField("input", X_poly) testdata.setField("target", y) trained.testOnData(testdata, verbose=True)
def make_dataset(): """ Creates a set of training data. """ X = [] y = [] for i in range(0, 100): x = normalvariate(i, 1.6) X.append([x, x + 1]) y.append([4 * x ** 2]) poly = PolynomialFeatures(2) X_poly = poly.fit_transform(X) data = SupervisedDataSet(5, 1) data.setField("input", X_poly) data.setField("target", y) return data
def predict(in_dir, model_file, nets): print('predict...') res = np.array([]) for t in nets: k = t[0] v = t[1] print(k + ': predicting at ' + str(datetime.now())) out_dir = in_dir + k + '/out/' # read tables dft = pd.read_table(out_dir + '/' + v[0] + '.test.csv', sep=',', index_col=[0,1]) cols = _cols X = dft[cols] y = np.zeros(len(X)) ds = SupervisedDataSet(X.shape[1], 1) ds.setField('input', X) ds.setField('target', y.reshape(-1,1)) f = open(model_file, 'r') s2 = f.read() net2 = pickle.loads(s2) preds = net2.activateOnDataset(ds) probs = preds X['key'] = dft[['i','j']].apply(lambda x: k + '_' + str((int)(x['i'])) + '_' + str((int)(x['j'])), axis=1) X['pred'] = probs if len(res) == 0: res = X[['key', 'pred']].values else: res = np.concatenate([res, X[['key', 'pred']].values]) print(k + ': done predicting; k size=' + str(len(X))+ ' | res size=' + str(len(res))) print('writing final output') df = pd.DataFrame(res,columns=['NET_neuronI_neuronJ','Strength']) df.to_csv(in_dir + '/out/predictions.csv', index=False) print('done; num rows=' + str(len(df)))
def train_CV(self,n_folds=5,num_neuron = 50,learning_rate_input=0.01,decay=0.01,maxEpochs_input=1200,verbose_input=True): '''call the class in model validators''' '''and do cross validation''' '''pass values''' dataset = self.data_set l = dataset.getLength() indim = dataset.indim outdim = dataset.outdim inp = dataset.getField("input") out = dataset.getField("target") perms = np.array_split(permutation(l), n_folds) perf = 0 for i in range(n_folds): train_perms_idxs = list(range(n_folds)) train_perms_idxs.pop(i) temp_list = [] for train_perms_idx in train_perms_idxs: temp_list.append(perms[ train_perms_idx ]) train_idxs = np.concatenate(temp_list) #this is the test set: test_idxs = perms[i] #train: print "Training on part: ", i train_ds = SupervisedDataSet(indim,outdim) train_ds.setField("input", inp[train_idxs]) train_ds.setField("target",out[train_idxs]) net_this = buildNetwork(indim,num_neuron,outdim,bias=True,hiddenclass = SigmoidLayer) t_this = BackpropTrainer(net_this,train_ds,learningrate = learning_rate_input,weightdecay=decay, momentum=0.,verbose=verbose_input) #train asked times: t_this.trainEpochs(maxEpochs_input) #test on testset. test_ds = SupervisedDataSet(indim,outdim) test_ds.setField("input", inp[test_idxs]) test_ds.setField("target",out[test_idxs]) perf_this = self._net_performance(net_this, test_ds) perf = perf + perf_this perf /=n_folds print perf return perf
def NN_analysis(features,labels): #training svm print('start to train neural network') print('get dates') dates=list(labels.keys()) dates = [datetime.datetime.strptime(ts, "%Y-%m-%d") for ts in dates] dates.sort() print(dates) x=[] y=[] print('make vectors') for date in dates[1:62]: time_stamp=datetime.datetime.strftime(date, "%Y-%m-%d") feature=np.array(features[time_stamp]) for i in range(1, 2): temp = date-datetime.timedelta(days=i) a=np.array(features[datetime.datetime.strftime(temp, "%Y-%m-%d")]) feature=np.add(feature,a) #print(list(feature)) x.append(feature) y.append(labels[time_stamp]) print('NN training starts') x = np.array(x) #x.reshape(-1,1) ds = SupervisedDataSet(len(x[0]), 1) ds.setField('input', x) y=np.array(y).reshape( -1, 1 ) ds.setField('target', y) net = buildNetwork(len(x[0]), 500, 1, bias=True) trainer = BackpropTrainer(net, ds) trainer.trainUntilConvergence(verbose=True, validationProportion=0.15, maxEpochs=500, continueEpochs=10) print ("fit finished") #training_indices=np.random.choice(len(dates), len(dates)/10) count=0 for i in range(0,len(x)): print("predict={0},actual={1}".format(net.activate(x[i]), y[i])) if net.activate(x[i]) * y[i] > 0: count += 1 print("accuracy={0}".format(float(count) / len(labels)))
def get_nn_dom_prediction(train_data, train_truth, test_data, test_truth, hidden=(5,), weight_decay=0.0): # Convert data to capture dominance. train_data, test_data = tuple(map(_convert_to_individual_alleles, [train_data, test_data])) mean = np.mean(train_truth) sd = np.std(train_truth) # Supervised training dataset. ds = SupervisedDataSet(train_data.shape[1], 1) ds.setField('input', train_data) ds.setField('target', (train_truth[:, np.newaxis] - mean) / sd) net = _get_nn(train_data.shape[1], hidden) _train_nn(net, ds, weight_decay) # Unsupervised (test) dataset. test_ds = UnsupervisedDataSet(test_data.shape[1]) test_ds.setField('sample', test_data) predicted = net.activateOnDataset(test_ds) * sd + mean return predicted.ravel()
def _prepare_dataset(self, X, y, model_type): """ Prepare data in pybrain format. :param pandas.DataFrame X: data of shape [n_samples, n_features] :param y: values for samples --- array-like of shape [n_samples] :param str model_type: classification or regression label :return: self """ X, y, sample_weight = check_inputs( X, y, sample_weight=None, allow_none_weights=True, allow_multiple_targets=model_type == 'regression') X = self._transform_data(X, y, fit=not self._is_fitted()) if model_type == 'classification': if not self._is_fitted(): self._set_classes(y) target = one_hot_transform(y, n_classes=len(self.classes_)) elif model_type == 'regression': if len(y.shape) == 1: target = y.reshape((len(y), 1)) else: # multi regression target = y if not self._is_fitted(): self.n_targets = target.shape[1] else: raise ValueError('Wrong model type') dataset = SupervisedDataSet(X.shape[1], target.shape[1]) dataset.setField('input', X) dataset.setField('target', target) return dataset
def neuralNetworkAlgorithm(datadfTraining, datadfTesting): print("Executing Neural Network algorithm") label = datadfTraining.OriginalInterestRate label = label.reshape(-1, 1) datadfTraining.drop('OriginalInterestRate', axis=1, inplace=True) features = datadfTraining labelTesting = datadfTesting.OriginalInterestRate labelTesting = labelTesting.reshape(-1, 1) datadfTesting.drop('OriginalInterestRate', axis=1, inplace=True) featuresTesting = datadfTesting print("Training Data") hidden_size = 3 epochs = 2 input_size = features.shape[1] target_size = label.shape[1] ds = SupervisedDataSet(input_size, target_size) ds.setField('input', features) ds.setField('target', label) net = buildNetwork(input_size, hidden_size, target_size, bias=True) trainer = BackpropTrainer(net, ds) print("Training for {} epochs...".format(epochs)) for i in range(epochs): mse = trainer.train() rmse = math.sqrt(mse) print("Training RMSE, epoch {}: {}".format(i + 1, rmse)) input_size = featuresTesting.shape[1] target_size = labelTesting.shape[1] ds = SupervisedDataSet(input_size, target_size) ds.setField('input', featuresTesting) ds.setField('target', labelTesting) p = net.activateOnDataset(ds) mse = mean_squared_error(labelTesting, p) rmse = math.sqrt(mse) print("Testing rmse:" + str(rmse))
def test_neural_nets(ds): def plot_errors(x, train_err, test_err): plt.plot(x, train_err, label='Training error') plt.xlabel('Epochs') plt.ylabel('Error') plt.title('Training error using backpropagation') plt.legend() plt.show() input_size = len(ds.train.x[0]) # no. of attributes target_size = 1 hidden_size = 5 iterations = 1000 n = FeedForwardNetwork() in_layer = LinearLayer(34) hidden_layer = [SigmoidLayer(20), SigmoidLayer(20), SigmoidLayer(20)] out_layer = LinearLayer(1) n.addInputModule(in_layer) for layer in hidden_layer: n.addModule(layer) n.addOutputModule(out_layer) in_to_hidden = FullConnection(in_layer, hidden_layer[0]) h1 = FullConnection(hidden_layer[0], hidden_layer[1]) h2 = FullConnection(hidden_layer[1], hidden_layer[2]) hidden_to_out = FullConnection(hidden_layer[2], out_layer) n.addConnection(in_to_hidden) n.addConnection(h1) n.addConnection(h2) n.addConnection(hidden_to_out) n.sortModules() print n train_nnds = SupervisedDataSet(input_size, target_size) train_nnds.setField('input', ds.train.x) one_train_reshaped = np.array(ds.train.y).reshape(-1,1) train_nnds.setField('target', one_train_reshaped) trainer = BackpropTrainer( n, train_nnds ) epochs, train_acc, test_acc = [], [], [] for i in xrange(iterations): trainer.train() train_pred_y = [] # Compute percent training error for row in ds.train.x: p = int( round( n.activate(row)[0] ) ) if p >= 1: p = 1 else: p = 0 # sometimes rounding takes us to 2 or -1 train_pred_y.append(p) train_error = percentError(train_pred_y, ds.train.y) if i%25 == 0 or i==iterations-1: epochs.append(i) train_acc.append(train_error) print "Train error", train_error plot_errors(epochs, train_acc, test_acc)
net.addConnection(theta2) # sort module net.sortModules() # create a dataset object, make output Y a softmax matrix allData = SupervisedDataSet(n, numLabels) Y2 = convertToOneOfMany(Y) # add data samples to dataset object, both ways are correct '''for i in range(m): inData = X[i,:] outData = Y2[i, :] allData.addSample(inData, outData) ''' allData.setField('input', X) allData.setField('target', Y2) #separate training and testing data dataTrain, dataTest = allData.splitWithProportion(.9) # create object for training train = BackpropTrainer(net, dataset=dataTrain, learningrate=0.03, momentum=0.3) #train.trainUntilConvergence(dataset=dataTrain) # evaluate correct output for trainer trueTrain = dataTrain['target'].argmax(axis=1) trueTest = dataTest['target'].argmax(axis=1)
# coding: utf-8 from pybrain.tools.shortcuts import buildNetwork from pybrain.datasets import SupervisedDataSet from pybrain.supervised.trainers import BackpropTrainer import pandas as pd import json ds = SupervisedDataSet(24, 1) tf = pd.read_excel("G1.xlsx") dados = tf.iloc[:, :23] res = tf.iloc[:, 24] #ds.setField('input', dados) ds.setField('target', res) # print(res) # rename = { # "Sim":"1", # "Nao":"0" # } # res = res.map(rename) # print(res) # for line in tf.readlines(): # data = [float(x) for x in line.strip().split(',') if x != ''] # indata = tuple(data[:]) # outdata = tuple(data[19:]) # print(indata) # print(outdata)
# X, cv_x, Y, cv_y = train_test_split(train_data, train_label, test_size=0.30, random_state=85) X = train_data Y = train_label print("building dataset") # build model input_size = X.shape[1] target_size = 1 # supervised dataset ds = SupervisedDataSet(input_size, target_size) Y = Y.reshape(-1, 1) ds.setField('input', X) ds.setField('target', Y) print("building network") # build network hidden_size = 30 epochs = 200 continue_epochs = 10 validation_proportion = 0.20 nn = buildNetwork(input_size, hidden_size, target_size, bias=True) bp = BackpropTrainer(nn, ds) print("training...") # train with cv # bp.trainUntilConvergence(verbose=True, validationProportion=0.20, maxEpochs=1000, continueEpochs=10)
Xscaler = X_scaler.transform(X_train) Yscaler = Y_scaler.transform(Y_train) X_scalerTe = preprocessing.StandardScaler().fit(X_test) Y_scalerTe = preprocessing.StandardScaler().fit(Y_test) XscalerTe = X_scalerTe.transform(X_test) YscalerTe = Y_scalerTe.transform(Y_test) #train X and y Xscaler = pd.DataFrame(Xscaler) Yscaler = pd.DataFrame(Yscaler) #test X and Y XscalerTe = pd.DataFrame(XscalerTe) YscalerTe = pd.DataFrame(YscalerTe) net = buildNetwork(10, 50, 1, outclass=SigmoidLayer) ds = SupervisedDataSet(10, 1) ds.setField('input', Xscaler) ds.setField('target', Y_train) trainer = BackpropTrainer(net, ds) for i in range(10): print(i) trainer.train() #New data for testing: X_test0 = np.ravel(XscalerTe.iloc[0:, 0:1]) X_test1 = np.ravel(XscalerTe.iloc[0:, 1:2]) X_test2 = np.ravel(XscalerTe.iloc[0:, 2:3]) X_test3 = np.ravel(XscalerTe.iloc[0:, 3:4]) X_test4 = np.ravel(XscalerTe.iloc[0:, 4:5]) X_test5 = np.ravel(XscalerTe.iloc[0:, 5:6]) X_test6 = np.ravel(XscalerTe.iloc[0:, 6:7])
plotname = os.path.join(plotdir, ('jpq2layers_plot' + str(iter))) pylab.savefig(plotname) # set-up the neural network nneuron = 5 mom = 0.98 netname = "LSL-" + str(nneuron) + "-" + str(mom) mv = ModuleValidator() v = Validator() #create the test DataSet x = numpy.arange(0.0, 1.0 + 0.01, 0.01) s = 0.5 + 0.4 * numpy.sin(2 * numpy.pi * x) tsts = SupervisedDataSet(1, 1) tsts.setField('input', x.reshape(len(x), 1)) tsts.setField('target', s.reshape(len(s), 1)) #read the train DataSet from file trndata = SupervisedDataSet.loadFromFile(os.path.join(os.getcwd(), 'trndata')) myneuralnet = os.path.join(os.getcwd(), 'myneuralnet.xml') if os.path.isfile(myneuralnet): n = NetworkReader.readFrom(myneuralnet, name=netname) #calculate the test DataSet based on the trained Neural Network ctsts = mv.calculateModuleOutput(n, tsts) tserr = v.MSE(ctsts, tsts['target']) print 'MSE error on TSTS:', tserr myplot(trndata, tsts=tsts, ctsts=ctsts) pylab.show()
Y = [] for i in range(ntrain): if (X1[i] > 0.5 and X2[i] > 0.5) or (X1[i] <= 0.5 and X2[i] <= 0.5): Y.append(0) else: Y.append(1) dataset = pd.DataFrame(zip(X1, X2, Y), columns=["X1", "X2", "Y"]) X_train = dataset[["X1", "X2"]] Y_train = dataset[["Y"]] '''Arquitectura de la red neuronal''' net = buildNetwork( 2, 4, 1 ) #### Neuronas en capa entrada, cuantas neuronas en capa intermedia, capa final ds = SupervisedDataSet(2, 1) ds.setField('input', X_train) ds.setField('target', Y_train) plt.scatter(ds['input'][:, 0], ds['input'][:, 1], c=ds['target'], linewidths=0) '''Entreno mi red neuronal''' trainer = BackpropTrainer(net, ds) trainer.trainEpochs(500) '''Creo Datos de prueba''' ntest = 1000 X1test = map(random.uniform, [0] * ntest, [1] * ntest) X2test = map(random.uniform, [0] * ntest, [1] * ntest) Ytest = [] for i in range(ntest): if (X1test[i] > 0.5 and X2test[i] > 0.5) or (X1test[i] <= 0.5 and X2test[i] <= 0.5): Ytest.append(0) else:
import pybrain from pybrain.tools.shortcuts import buildNetwork from pybrain.datasets import SupervisedDataSet from pybrain.supervised.trainers import BackpropTrainer import pandas as pd import numpy as np import csv df = pd.read_csv("winecsv.csv") print(df) net = buildNetwork(4, 2, 1) ds = SupervisedDataSet(4, 1) ds.setField('input', df[['fx.acidity', 'vol.acidity', 'citacid', 'totsuldiox']]) ds.setField('target', df[['quality']]) trainer = BackpropTrainer(net, ds) for i in range(15): trainer.train() r1 = net.activate([12, .6, .6, 22]) #bajo r2 = net.activate([5, .3, .3, 25]) # print(r1), print(r2),
theta1 = FullConnection(inLayer, hiddenLayer) theta2 = FullConnection(hiddenLayer, outLayer) # add connections to network net.addConnection(theta1) net.addConnection(theta2) # sort module net.sortModules() # create a dataset object, make output Y a softmax matrix allData = SupervisedDataSet(n, numLabels) Y2 = convertToOneOfMany(Y) # add data samples to dataset object, both ways are correct allData.setField('input', X_train) allData.setField('target', Y2) #separate training and testing data dataTrain = allData # create object for training train = BackpropTrainer(net, dataset=dataTrain, learningrate=0.03, momentum=0.3) # evaluate correct output for trainer #trueTrain = dataTrain['target'].argmax(axis=1) # train step by step
def neuralNetworkRegression(X, Y, X_TEST, Y_TEST): """ :param X: data consisting of features (excluding class variable) :param Y: column vector consisting of class variable :return: models neural network regression with fine-tuning of epochs """ print "NEURAL NETWORK REGRESSION" print "Executing..." print try: print "Loading saved model..." net = pickle.load(open("Models/neural.sav", 'rb')) """ predict new value """ prediction = net.activate(X_TEST) print "Predicted: ",prediction," True: ", Y_TEST#, "Error: ",np.sqrt(mean_squared_error(map(float,Y_test), ridge.predict(X_test))) return prediction except: # can change to model on the entire dataset but by convention splitting the dataset is a better option X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size = 0.10, random_state = 5) Y_test = Y_test.reshape(-1,1) Y_train = Y_train.reshape(-1,1) RMSEerror = [] train = np.vstack((X_train, X_test)) # append both testing and training into one array outputTrain = np.vstack((Y_train, Y_test)) outputTrain = [float(s.item()) for s in outputTrain] outputTrain = np.asarray(outputTrain, dtype=np.float64) # print outputTrain outputTrain = outputTrain.reshape( -1, 1 ) inputSize = train.shape[1] targetSize = outputTrain.shape[1] ds = SupervisedDataSet(inputSize, targetSize) ds.setField('input', train) ds.setField('target', outputTrain) hiddenSize = 3 epochs = 10000 # got after parameter tuning # neural network training model net = buildNetwork( inputSize, hiddenSize, targetSize, hiddenclass=TanhLayer, bias = True ) trainer = BackpropTrainer(net, ds, learningrate=0.1) print "Model training in process..." train_mse, validation_mse = trainer.trainUntilConvergence(verbose = True, validationProportion = 0.15, maxEpochs = epochs, continueEpochs = 10) p = net.activateOnDataset(ds) mse = mean_squared_error(map(float, outputTrain), map(float, p)) rmse = mse ** 0.5 print "Root Mean Squared Error for Best Parameters : " + str(rmse) """ save model """ # pickle.dump(net, open("Models/neural.sav", 'wb')) """ predict new value """ prediction = net.activate(X_TEST) print "Predicted: ",prediction," True: ", Y_TEST#, "Error: ",np.sqrt(mean_squared_error(map(float,Y_test), ridge.predict(X_test))) return prediction
cart_t.append(0) bought_t.append(1) test_user_weekiter['cat_view'] = np.array(view_nu) test_user_weekiter['cat_cart'] = np.array(cart_nu) test_user_weekiter['cat_mark'] = np.array(mark_nu) test_user_weekiter['cat_bought'] = np.array(bought_nu) test_user_weekiter['view_tag'] = np.array(view_t) test_user_weekiter['mark_tag'] = np.array(mark_t) test_user_weekiter['bought_tag'] = np.array(bought_t) test_user_weekiter['cart_tag'] = np.array(cart_t) ds.setField( 'input', training[[ 'cat_view', 'cat_cart', 'cat_mark', 'cat_bought', 'view_tag', 'mark_tag', 'bought_tag,cart_tag' ]]) ds.setField('target', training['label_tag']) print("---------------make test data-------------------------------------") out = SupervisedDataSet(8, 1) test_item_pre = pd.merge(train_item_df, test_user_weekiter, on=['item_id', 'item_category'], how='inner') for i_ter in test_item_pre.index: out.addSample(
if y1 == 1: y.append([1, 0, 0 ,0]) elif y1 == 2: y.append([0, 1, 0 ,0]) elif y1 == 3: y.append([0, 0, 1 ,0]) elif y1 == 4: y.append([0, 0, 0 ,1]) <<<<<<< HEAD ds = SupervisedDataSet( 379, 4 ) ======= ds = SupervisedDataSet( params_len, 4 ) >>>>>>> svm2 ds.setField( 'input', trainParams['x'] ) ds.setField( 'target', y ) # init and train <<<<<<< HEAD net = buildNetwork( 379, hidden_size, 4, bias = True ) ======= net = buildNetwork( params_len, hidden_size, 4, bias = True ) >>>>>>> svm2 trainer = BackpropTrainer( net,ds ) print "training for {} epochs...".format( epochs ) #trainer.trainUntilConvergence(verbose=True)
# one_pred_y.append(p) # print "Test error: " + str(percentError(one_pred_y, one_test_y)) # print set(one_pred_y), len(one_pred_y) # print set(one_test_y), len(one_pred_y) # generate_report(trainer, one_pred_y, one_test_y) # converting g/b classes to numbers 0/1 temp_train_y, temp_test_y = two_train_y, two_test_y two_train_y = [1 if a == 'g' else 0 for a in temp_train_y] two_test_y = [1 if a == 'g' else 0 for a in temp_test_y] print "Ionosphere" input_size = len(two_train_x[0]) print input_size train_nnds = SupervisedDataSet(input_size, target_size) train_nnds.setField('input', two_train_x) two_train_reshaped = np.array(two_train_y).reshape(-1, 1) train_nnds.setField('target', two_train_reshaped) net = buildNetwork(input_size, hidden_size, target_size, bias=True) trainer = BackpropTrainer(net, train_nnds) epochs, train_error_data, test_error_data = [], [], [] for i in xrange(iterations): train_error = trainer.train() if i % 100 == 0 or i == iterations - 1: epochs.append(i) train_error_data.append(train_error) print "Train error", train_error two_pred_y = [] for row in two_test_x: p = int(round(net.activate(row)[0])) if p >= 1: p = 1