def trainModel(self): self.finalDataSet = np.c_[self.flattenNumericalData, self.flattenCategoryData, self.flattenTargetDataConverted] self.finalHeaderSet = self.flattenNumericalHeader + self.flattenCategoryHeader + self.flattenTargetHeader self.nattributes = self.flattenNumericalData.shape[1] + self.flattenCategoryData.shape[1] ds = ClassificationDataSet(self.nattributes, 1, nb_classes=self.nbClasses) for rowData in self.finalDataSet: target = rowData[-1] variables = rowData[0:-1] ds.addSample(variables, target) self.testDataSet, self.trainDataSet = ds.splitWithProportion(0.25) self.testDataSet._convertToOneOfMany() self.trainDataSet._convertToOneOfMany() print self.testDataSet print self.trainDataSet self.net = buildNetwork(self.nattributes, self.nhiddenNerons, self.noutput, hiddenclass=TanhLayer, outclass=SigmoidLayer, bias=True) self.trainer = BackpropTrainer(self.net, self.trainDataSet, learningrate=0.001, momentum=0.99) begin0 = time.time() # self.trainer.trainUntilConvergence(verbose=True, dataset=ds, validationProportion=0.25, maxEpochs=10) for i in xrange(10): begin = time.time() self.trainer.trainEpochs(10) end = time.time() print 'iteration ', i, ' takes ', end-begin, 'seconds' end0 = time.time() print 'total time consumed: ', end0 - begin0
def importFromCSV(self, fileName, numInputs, numClasses): """ Function that reads in a CSV file and passes on to the pybrain neural net dataset structure to be used with the library's neural net classes. It expects that the last columns (determined by numOutputs) to be the classification columns. """ dataSet = None dataFile = open(fileName) line = dataFile.readline() data = [str(x) for x in line.strip().split(',') if x != ''] if(data[0] == '!labels:'): labels = data[1:] dataSet = ClassificationDataSet(numInputs, nb_classes=numClasses, class_labels=labels) line = dataFile.readline() else: dataSet = ClassificationDataSet(numInputs, nb_classes=numClasses) while line != '': data = [float(x) for x in line.strip().split(',') if x != ''] inputData = data[:numInputs] outputData = data[-1:] dataSet.addSample(inputData, outputData) line = dataFile.readline() dataFile.close() return dataSet
def generate_Testdata(index): INPUT_FEATURES = 200 CLASSES = 5 train_text,train_classfi_number,train_classfi,train_feature_name = getTargetData("Breast_test.data") train_text = getIndexData(train_text,index) alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i]=="lumina" : klass = 0 alldata.addSample(features, klass) elif train_classfi[i]=="ERBB2" : klass = 1 alldata.addSample(features, klass) elif train_classfi[i]=="basal" : klass = 2 alldata.addSample(features, klass) elif train_classfi[i]=="normal" : klass = 3 alldata.addSample(features, klass) elif train_classfi[i]=="cell_lines" : klass = 4 alldata.addSample(features, klass) return {'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata,'index':index}
def conductGeneration(generation, corpus): ''' Conducts a generation of learning and testing on the input data generation (int) --- the number of the generation corpus (object) --- corpus object containing info needed ''' # Set up the dataset skeleton alldata = ClassificationDataSet(2, 1, nb_classes=3, class_labels=['a', 'b', 'c']) # means = [(-1,0),(2,4),(3,1)] # cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] # alldata = ClassificationDataSet(2, 1, nb_classes=3) # for n in xrange(400): # for klass in range(3): # input = multivariate_normal(means[klass],cov[klass]) # print type(input) # alldata.addSample(input, [klass]) alldata.addSample((0, 1), (1)) alldata.addSample((1, 0), (0)) alldata.addSample((0, 0), (2)) alldata.addSample((1, 1), (0)) trndata, partdata = alldata.splitWithProportion(0.5) return alldata
def run_nn_fold(training_data, test_data): test_features, ignore, featureMap, labels, labelMap = fs.mutualinfo(training_data) input_len = len(test_features[0]) num_classes = len(labelMap.keys()) train_ds = ClassificationDataSet(input_len, 1,nb_classes=num_classes) for i in range(len(test_features)): train_ds.addSample(tuple(test_features[i]), (labels[i])) train_ds._convertToOneOfMany() net = buildNetwork(train_ds.indim, 2, train_ds.outdim, bias=True, hiddenclass=TanhLayer, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, train_ds, verbose=True) print "training until convergence..." trainer.trainUntilConvergence(maxEpochs=100) print "done. testing..." test_ds = ClassificationDataSet(input_len, 1,nb_classes=num_classes) labels = [] for tweetinfo in test_data: featuresFound = tweetinfo["Features"] label = tweetinfo["Answer"] labels.append(label) features = [0]*len(featureMap.keys()) for feat in featuresFound: if feat in featureMap: features[ featureMap[feat] ] = 1 test_ds.addSample(tuple(features), (labelMap[label])) test_ds._convertToOneOfMany() tstresult = percentError( trainer.testOnClassData( dataset=test_ds ), test_ds['class'] ) print tstresult
def generate_data(): index = [8673,1646,116,2191,4326,6718,7796,8531,8763,5646,3626,5451,2004,8079,4044,6471,675,3746,6338,3149,4880,4869,6213,5316,3544,1046,7739,8309,4147,5526,5555,1504,1625,2680,5814,1305,3998,794,4355,6788,3343,867,343,3706,6902,4250,9014,5478,788,5323,677,9215,9214,9213,9212,9211,9210,9209,9208,9207,9206,9205,9204,9203,9202,9201,9200,9199,9198,9197,9196,9195,9194,9193,9192,9191,9190,9189,9188,9187,9186,9185,9184,9183,9182,9181,9180,9179,9178,9177,9176,9175,9174,9173,9172,9171,9170,9169,9168,9167,9166,9165,9164,9163,9162,9161,9160,9159,9158,9157,9156,9155,9154,9153,9152,9151,9150,9149,9148,9147,9146,9145,9144,9143,9142,9141,9140,9139,9138,9137,9136,9135,9134,9133,9132,9131,9130,9129,9128,9127,9126,9125,9124,9123,9122,9121,9120,9119,9118,9117,9116,9115,9114,9113,9112,9111,9110,9109,9108,9107,9106,9105,9104,9103,9102,9101,9100,9099,9098,9097,9096,9095,9094,9093,9092,9091,9090,9089,9088,9087,9086,9085,9084,9083,9082,9081,9080,9079,9078,9077,9076,9075,9074,9073,9072,9071,9070,9069,9068,9067] INPUT_FEATURES = 200 CLASSES = 5 train_text,train_classfi_number,train_classfi,train_feature_name = getTargetData("Breast_train.data") train_text = getIndexData(train_text,index) alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i]=="lumina" : klass = 0 alldata.addSample(features, klass) elif train_classfi[i]=="ERBB2" : klass = 1 alldata.addSample(features, klass) elif train_classfi[i]=="basal" : klass = 2 alldata.addSample(features, klass) elif train_classfi[i]=="normal" : klass = 3 alldata.addSample(features, klass) elif train_classfi[i]=="cell_lines" : klass = 4 alldata.addSample(features, klass) return {'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata,'index':index}
def createnetwork(n_hoglist,n_classlist,n_classnum,n_hiddensize=100): n_inputdim=len(n_hoglist[0]) n_alldata = ClassificationDataSet(n_inputdim,1, nb_classes=n_classnum) for i in range(len(n_hoglist)): n_input = n_hoglist[i] n_class = n_classlist[i] n_alldata.addSample(n_input, [n_class]) n_tstdata, n_trndata = n_alldata.splitWithProportion( 0.25 ) n_trndata._convertToOneOfMany( ) n_tstdata._convertToOneOfMany( ) print "Number of training patterns: ", len(n_trndata) print "Input and output dimensions: ", n_trndata.indim, n_trndata.outdim print "First sample (input, target, class):" print n_trndata['input'][0], n_trndata['target'][0], n_trndata['class'][0] n_fnn = buildNetwork(n_trndata.indim,n_hiddensize, n_trndata.outdim, outclass=SoftmaxLayer) n_trainer = BackpropTrainer(n_fnn, dataset=n_trndata, momentum=0.1, verbose=True, weightdecay=0.01) n_result = 1 while n_result > 0.1: print n_result n_trainer.trainEpochs(1) n_trnresult = percentError(n_trainer.testOnClassData(), n_trndata['class']) n_tstresult = percentError(n_trainer.testOnClassData( dataset=n_tstdata), n_tstdata['class']) print "epoch: %4d" % n_trainer.totalepochs, \ " train error: %5.2f%%" % n_trnresult, \ " test error: %5.2f%%" % n_tstresult n_result = n_tstresult
def batch_classify(self, samples): ds = ClassificationDataSet(len(self._fx)) for sample in samples: fvec = [sample[l] for l in self._fx] ds.addSample(fvec, [0]) results = self._trainer.testOnClassData(ds) return [self._rmap[r] for r in results]
def gen_data(csv_file, db): keywords = {} count = 0 img_list = [] with open(csv_file) as f: content = f.readlines() f.close() for line in content: aux = line.replace('\n', '').split(',') if aux[1] not in keywords: keywords[aux[1]] = count count += 1 img_list.append(aux) data = ClassificationDataSet(768, len(keywords), nb_classes=len(keywords)) n = len(keywords) for img in img_list: path = db + '/' + img[0] im = Image.open(path).convert('RGB') data.addSample(get_img_feats(im), get_keyword_class(keywords[img[1]], n)) return data, n, keywords
def prepare_datasets(inp,out,dataframe, ratio): '''conversion from pandas dataframe to ClassificationDataSet of numpy parameters: inp: list of names of input features out: list of names of output features(target value) ratio: ratio of dimension of test to train dataset ''' inp_dim = len(inp) out_dim = len(out) no_classes = 2 alldata = ClassificationDataSet(inp_dim,out_dim,no_classes) inp = dataframe[inp] out = dataframe[out] #for [a,b,c],d in zip(inp.values,out.values): for i in range(len(inp.values)): d = out.values[i] if d=='up': d = 0 elif d == 'down': d = 1 else: d =2 alldata.addSample(inp.values[i],d) tstdata_temp, trndata_temp = alldata.splitWithProportion( ratio ) # to convert supervised datasets to classification datasets tstdata = trndata = ClassificationDataSet(inp_dim, out_dim, no_classes) for n in range(0, tstdata_temp.getLength()): tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1] ) for n in range(0, trndata_temp.getLength()): trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() return alldata, trndata, tstdata
def _convert_supervised_to_classification(supervised_dataset,classes): classification_dataset = ClassificationDataSet(supervised_dataset.indim,supervised_dataset.outdim,classes) for n in xrange(0, supervised_dataset.getLength()): classification_dataset.addSample(supervised_dataset.getSample(n)[0], supervised_dataset.getSample(n)[1]) return classification_dataset
def ann(training_filename , testing_filename,itr,epoch,model_type): training_start_time = "The generation of data set and training started at :%s" % datetime.datetime.now() training_dataset = np.genfromtxt(training_filename, skip_header=0,dtype="int", delimiter='\t' ) data = ClassificationDataSet(len(training_dataset[0])-1, 2, nb_classes=2) for aSample in training_dataset: data.addSample(aSample[0:len(aSample)-1],[aSample[len(aSample)-1]] ); # data._convertToOneOfMany( ) fann = buildNetwork(314,2,outclass=SoftmaxLayer); trainer = BackpropTrainer( fann, dataset=data, momentum=0.1, verbose=False, weightdecay=0.01) counter = 0; print training_start_time while(counter < itr): trainer.trainEpochs( epoch ); counter = counter + 1; trnresult = percentError( trainer.testOnClassData(),data['class'] ) trained_result_log = "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult; training_time_end = "The training and result logging ended at %s :" % datetime.datetime.now() filename = working_dir + "\models\\"+model_type + ".obj" save_trained_model(fann, filename) log_file.write("\n" + training_start_time+"\n") log_file.write(str(trained_result_log)+"\n") log_file.write(training_time_end+"\n")
def generate_data(): INPUT_FEATURES = 9216 CLASSES = 5 train_text,train_classfi = getTargetData("Breast_train.data") alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i]=="lumina" : klass = 0 alldata.addSample(features, klass) elif train_classfi[i]=="ERBB2" : klass = 1 alldata.addSample(features, klass) elif train_classfi[i]=="basal" : klass = 2 alldata.addSample(features, klass) elif train_classfi[i]=="normal" : klass = 3 alldata.addSample(features, klass) elif train_classfi[i]=="cell_lines" : klass = 4 alldata.addSample(features, klass) return {'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata}
def getData(): fo = open("C:\\Program Files (x86)\\Lux\\Support\\data1per.txt") #data = [] ''' correctinds = range(0,5) for k in range(5, 131, 3): correctinds.append(k) correctinds.append(129) correctinds.append(130) for k in range(131, 257, 3): correctinds.append(k) correctinds.append(255) correctinds.append(256) ''' #alldata = ClassificationDataSet(92, 1) alldata = ClassificationDataSet(84, 1) count = 0 for line in fo.readlines(): #for k in range(0, 20000): count += 1 #line = fo.readline() line = [int(x.strip()) for x in line[1:-3].split(',')] line = [line[0]]+line[4:47]+line[49:90] alldata.addSample(line[1:], line[0]) print count return alldata
class NeuralNetLearner: def __init__(self): self.bunch = load_digits() self.X = np.asarray(self.bunch.data, 'float32') self.Y = np.asarray(self.bunch.target, 'float32') #self.X, self.Y = nudge_dataset(self.X, self.bunch.target) self.X = (self.X - np.min(self.X, 0)) / (np.max(self.X, 0) + 0.0001) # 0-1 scaling self.ds = ClassificationDataSet(64, nb_classes=10, class_labels=self.bunch.target_names) for (x, y) in zip(self.X, self.Y): self.ds.addSample(x, y) self.test_data, self.train_data = self.ds.splitWithProportion(0.3) self.network = buildNetwork(64, 10, 1) def get_datasets(self): return self.train_data, self.test_data def activate(self, x): self.network.activate(x.tolist()) def fitness_func(self, x): if not (x.size == 64): print("Bad input vector: ", x) return sum_of_squared_error = 0 for (input, target) in self.ds: sum_of_squared_error += (target - self.activate(input.tolist())) return (sum_of_squared_error / self.ds.length) def get_weights(self): return
def toClassificationDataset(codedSampleSet): classifiedSampleSet = [] # Calculate the unique classes classes = [] for sample in codedSampleSet: classifier = getClassifier(sample) if classifier not in classes: classes.append(classifier) classes.sort() # Now that we have all the classes, we process the outputs for sample in codedSampleSet: classifier = getClassifier(sample) classifiedSample = one_to_n(classes.index(classifier), len(classes)) classifiedSampleSet.append(classifiedSample) # Build the dataset sampleSize = len(codedSampleSet[0]) classifiedSampleSize = len(classifiedSampleSet[0]) dataset = ClassificationDataSet(sampleSize, classifiedSampleSize) for i in range(len(classifiedSampleSet)): dataset.addSample(codedSampleSet[i], classifiedSampleSet[i]) return dataset, classes
def generate_data(n=400): INPUT_FEATURES = 2 CLASSES = 3 #means = [(-1, 0), (2, 4), (3, 1)] #cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])] alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) #minX, maxX = means[0][0], means[0][0] #minY, maxY = means[0][1], means[0][1] #print minX, maxX , minY, maxY # #for i in range(n): # for klass in range(CLASSES): # features = multivariate_normal(means[klass], cov[klass]) # #print means[klass], cov[klass] # #print features # x, y = features # minX, maxX = min(minX, x), max(maxX, x) # minY, maxY = min(minY, y), max(maxY, y) # alldata.addSample(features, [klass]) #print alldata alldata.addSample([0,0], [0]) alldata.addSample([0,1], [1]) alldata.addSample([1,0], [1]) alldata.addSample([1,1], [0]) return {'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata}
def read_data(filename): """ See http://www.pybrain.org/docs/api/datasets/classificationdataset.html Reads a (naive) csv file of data and converts it into a ClassificationDataSet. 'Naive' in this case means the data can be parsed by splitting on commas - i.e., no quotations or escapes. I picked this file format because it should be trivial to convert all our data into it. Raises an exception when an IO error occurs. Parameters: filename - The name of the file containing the data. """ data_file = open(filename, "r") data_lines = [line.split(',') for line in data_file.readlines()] data_file.close() features = [[float(f) for f in line[0:-1]] for line in data_lines] classes = [[int(line[-1])] for line in data_lines] # Workaround to make classifications zero-based class_min = min([c[0] for c in classes]) for i in range(len(classes)): classes[i][0] -= class_min data_set = ClassificationDataSet(len(features[0])) for feature_vector, classification in zip(features, classes): data_set.addSample(feature_vector, classification) return data_set
class NNetwork: def __init__(self): self.ds = ClassificationDataSet(7, 1, nb_classes=8) #8 since we have 8 gestures, 7 since we have 7 features def add_data(self, training_data): for gesture in training_data: self.ds.addSample(gesture[1], gesture[0]) #a method to add all the training data we have def newData(self, training_data): #a method for replacing the data already existing and adding data from scratch self.ds = ClassificationDataSet(7, 1, nb_classes=8) for gesture in training_data: self.ds.addSample(gesture[1], gesture[0]) def train(self, shouldPrint): tstdata, trndata = self.ds.splitWithProportion(0.2) #splits the data into training and verification data trndata._convertToOneOfMany() tstdata._convertToOneOfMany() self.fnn = buildNetwork(trndata.indim, 64, trndata.outdim, outclass=SoftmaxLayer) #builds a network with 64 hidden neurons self.trainer = BackpropTrainer(self.fnn, dataset=trndata, momentum=0.1, learningrate=0.01, verbose=True, weightdecay=0.1) #uses the backpropagation algorithm self.trainer.trainUntilConvergence(dataset=trndata, maxEpochs=100, verbose=True, continueEpochs=10, validationProportion=0.20) #early stopping with 20% as testing data trnresult = percentError( self.trainer.testOnClassData(), trndata['class'] ) tstresult = percentError( self.trainer.testOnClassData(dataset=tstdata ), tstdata['class'] ) if shouldPrint: print "epoch: %4d" % self.trainer.totalepochs, " train error: %5.2f%%" % trnresult, " test error: %5.2f%%" % tstresult def activate(self, data): #tests a particular data point (feature vector) return self.fnn.activate(data)
class NeuralNetwork(BaseWorkflow): def __init__(self, purpose='train', num_inputs=None, num_ouputs=None, classes=None, class_lables=None): super(NeuralNetwork, self).__init__() self.purpose = purpose self.data_path = self.config.neural_net.get(self.purpose, None) self.file_name = 'neural_net' self.all_data = ClassificationDataSet(num_inputs, num_ouputs, nb_classes=classes, class_labels=class_lables) self.train = None self.test = None self.neural_network = None self.train_result = None self.test_result = None self.cross_validation_result = None def process(self): self.prepare_train_test() self.build_network() trainer = self.train_network(dataset=self.train) self.score_train_test(trainer=trainer) self.cross_validate(dataset=self.all_data) def add_sample(self, correlogram_matrix=None, target=None, sample_path=None): self.all_data.addSample(correlogram_matrix, target) logger.info('sample added from {sample_path}'.format(sample_path=sample_path)) def prepare_train_test(self): self.test, self.train = self.all_data.splitWithProportion(0.25) def build_network(self): self.neural_network = buildNetwork(self.train.indim, 7, self.train.outdim, outclass=SoftmaxLayer) # feed forward network def train_network(self, dataset=None): starter_trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01) starter_trainer.trainUntilConvergence(validationProportion=0.25, maxEpochs=100) return starter_trainer def score_train_test(self, trainer=None): self.test_result = percentError(trainer.testOnClassData(dataset=self.test), self.test['class']) logger.info('test error result: {result}'.format(result=self.test_result)) self.train_result = percentError(trainer.testOnClassData(dataset=self.train), self.train['class'] ) logger.info('train error result: {result}'.format(result=self.train_result)) def cross_validate(self, dataset=None): trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01) validator = CrossValidator(trainer=trainer, dataset=dataset, n_folds=10) mean_validation_result = validator.validate() self.cross_validation_result = mean_validation_result logger.info('cross val result: {result}'.format(result=self.cross_validation_result)) @staticmethod def save_network_to_xml(net=None, file_name=None): NetworkWriter.writeToFile(net, file_name) @staticmethod def read_network_from_xml(file_name=None): return NetworkReader.readFrom(file_name)
def generateDataSet(): inFile = open("data/input.txt") inData = inFile.readlines() inFile.close() outFile = open("data/output.txt") outData = outFile.readlines() outFile.close() inputs = 120 #you will want to update this based on the state you have... ###I don't understand this comment. How do we update if we haven't calculated the state yet? classes= 11 #11 #Not much reson to change this one, there are only 11 destinations. allData = ClassificationDataSet(inputs,1,nb_classes=classes) start = time.clock() for i in range(len(inData)): b = loadBrain(inData[i].strip()) #inputs = len(b.g.heroes) - 1 + len(b.g.taverns_locs) + 4 #calls functions inside of the ai object. you will want to write these fcns. ins = b.createInputs(inputs) klass = b.determineClass(classes,eval(outData[i].strip())) expectedKlass = b.classInverse(klass) #if expectedKlass != eval(outData[i].strip()): # print expectedKlass, eval(outData[i].strip()) allData.addSample(ins,[klass]) #if(i > 1000): break if(i%100==0): print i,len(inData), "elapsed between sets", time.clock() - start return allData
def getdata(do_preprocessing, full_data): ''' fetch and format the match data according to the given flags do_preprocessing: bool: true if preprocessing needs to be do_preprocessing full_data: bool: false if the minimal data should be used ''' print ("fetching data ...") if full_data == 0 : fn = getMinimalDatafromMatch else: fn = getBasicDatafromMatch if globals.use_saved_data: try: with open('processed_data%d' % full_data) as outfile: data = json.load(outfile) except IOError: matches = Match.objects.all() data = map(lambda x: (fn(x,do_preprocessing,False), x.won), matches) data += map(lambda x: (fn(x,do_preprocessing,True), not x.won), matches) with open('processed_data%d' % full_data, 'w') as outfile: json.dump(data,outfile) else: matches = Match.objects.all() data = map(lambda x: (fn(x,do_preprocessing,False), x.won), matches) data += map(lambda x: (fn(x,do_preprocessing,True), not x.won), matches) with open('processed_data%d' % full_data, 'w') as outfile: json.dump(data,outfile) all_data = None for input, won in data: if all_data is None: all_data = ClassificationDataSet(len(input), 1, nb_classes=2) all_data.addSample(input, int(won)) return all_data
class neuralNetwork(): def __init__( self, n_classes ): self.n_classes = n_classes def fit( self, X, Y ): n_features = X.shape[1] self.train_ds = ClassificationDataSet( n_features, 1, nb_classes = self.n_classes ) for train, target in zip( X, Y ): self.train_ds.addSample( train, [target] ) self.train_ds._convertToOneOfMany( ) self.net = buildNetwork( self.train_ds.indim, 2*n_features, self.train_ds.outdim, outclass = SoftmaxLayer ) self.trainer = BackpropTrainer( self.net, self.train_ds ) def predict( self, X ): n_features = X.shape[1] self.test_ds = ClassificationDataSet( n_features, 1, nb_classes = self.n_classes ) for test in X: self.test_ds.addSample( test, [1] ) self.test_ds._convertToOneOfMany( ) for i in range( 100 ): self.trainer.trainEpochs( 5 ) self.labels = self.net.activateOnDataset( self.test_ds ) self.labels = self.labels.argmax(axis=1) return self.labels
def make_data_set(beg,end): ds = ClassificationDataSet(HISTORY*2+1, class_labels=['None', 'Buy' , 'Sell']) #SupervisedDataSet(HISTORY*3, 1) trainQ = rawData[(rawData.tradeDate <= end) & ( rawData.tradeDate >= beg)] for idx in range(1, len(trainQ) - HISTORY - 1 - HOLD-1): cur = idx + HISTORY - 1 if( abs( trainQ.iloc[cur]['MACD'] ) > 0.5 ): continue sample = [] for i in range(HISTORY): #sample.append( trainQ.iloc[idx+i]['EMAL'] )# [['EMAL','DIFF','DEA','CDIS']] ) ) sample.append( trainQ.iloc[idx+i]['DIFF'] ) sample.append( trainQ.iloc[idx+i]['DEA'] ) sample.append( trainQ.iloc[cur]['CDIS'] ) if max( trainQ.iloc[cur+1:cur+HOLD+1]['EMAS'] ) / trainQ.iloc[cur]['closeIndex'] > 1.05 : answer = 1 elif min( trainQ.iloc[cur+1:cur+HOLD+1]['EMAS'] ) / trainQ.iloc[cur]['closeIndex'] < 0.95: answer = 2 else: answer = 0 # print(sample) ds.addSample(sample, answer) return ds
def main(): for stock in STOCK_TICKS: # Download Data get_data(stock) # Import Data days = extract_data(stock) today = days.pop(0) # Make DataSet data_set = ClassificationDataSet(INPUT_NUM, 1, nb_classes=2) for day in days: target = 0 if day.change > 0: target = 1 data_set.addSample(day.return_metrics(), [target]) # Make Network network = buildNetwork(INPUT_NUM, MIDDLE_NUM, MIDDLE_NUM, OUTPUT_NUM) # Train Network trainer = BackpropTrainer(network) trainer.setData(data_set) trainer.trainUntilConvergence(maxEpochs=EPOCHS_MAX) # Activate Network prediction = network.activate(today.return_metrics()) print prediction
def build_dataset( mongo_collection, patch_size=IMG_SIZE, orig_size=IMG_SIZE, nb_classes=2, edgedetect=True, transform=True ): # depricated if edgedetect: import cv2 from pybrain.datasets import SupervisedDataSet, ClassificationDataSet patch_size = min(patch_size, orig_size) trim = round((orig_size - patch_size) / 2) # ds = SupervisedDataSet(patch_size**2, 1) ds = ClassificationDataSet(patch_size ** 2, target=1, nb_classes=nb_classes) cursor = list(mongo_collection.find()) for one_image in cursor: # convert from binary to numpy array and transform img_array = np.fromstring(one_image["image"], dtype="uint8") if edgedetect: img_array = cv2.Canny(img_array, 150, 200) img_crop = img_array.reshape(orig_size, orig_size)[trim : (trim + patch_size), trim : (trim + patch_size)] classification = float(one_image["class"]) if transform: transformed = transform_img(img_crop.ravel(), patch_size) else: transformed = [img_crop.ravel()] for one_img in transformed: ds.addSample(one_img.ravel(), classification) print("New dataset contains %d images (%d positive)." % (len(ds), sum(ds["target"]))) return ds
def simpleNeuralNetworkTrain(fileName, numFeatures, numClasses, possibleOutputs, numHiddenNodes, numTrainingEpochs): data = np.genfromtxt(fileName) trnIn = data[:, 0:5] trnOut = data[:, 6] trnOut = [int(val) for val in trnOut] normalizeData(trnIn, numFeatures) trndata = ClassificationDataSet(numFeatures, possibleOutputs, nb_classes=numClasses) for row in range(0, len(trnIn)): tempListOut = [] tempListIn = [] tempListOut.append(int(trnOut[row])) for i in range(0, numFeatures): tempListIn.append(trnIn[row][i]) trndata.addSample(tempListIn, tempListOut) trndata._convertToOneOfMany() # When running for the first time myNetwork = buildNetwork(numFeatures, numHiddenNodes, numClasses, outclass=SoftmaxLayer, bias=True, recurrent=False) # Read from file after the first try. # myNetwork = NetworkReader.readFrom('firstTime.xml') # Use saved results. trainer = BackpropTrainer(myNetwork, dataset=trndata, momentum=0.0, verbose=True, weightdecay=0.0) for i in range(numTrainingEpochs): trainer.trainOnDataset(dataset=trndata)
class EightBitBrain(object): def __init__(self, dataset, inNodes, outNodes, hiddenNodes, classes): self.__dataset = ClassificationDataSet(inNodes, classes-1) for element in dataset: self.addDatasetSample(self._binaryList(element[0]), element[1]) self.__dataset._convertToOneOfMany() self.__network = buildNetwork(inNodes, hiddenNodes, self.__dataset.outdim, recurrent=True) self.__trainer = BackpropTrainer(self.__network, learningrate = 0.01, momentum = 0.99, verbose = True) self.__trainer.setData(self.__dataset) def _binaryList(self, n): return [int(c) for c in "{0:08b}".format(n)] def addDatasetSample(self, argument, target): self.__dataset.addSample(argument, target) def train(self, epochs): self.__trainer.trainEpochs(epochs) def activate(self, information): result = self.__network.activate(self._binaryList(information)) highest = (0,0) for resultClass in range(len(result)): if result[resultClass] > highest[0]: highest = (result[resultClass], resultClass) return highest[1]
def main(): images, labels = load_labeled_training(flatten=True) images = standardize(images) # images, labels = load_pca_proj(K=100) shuffle_in_unison(images, labels) ds = ClassificationDataSet(images.shape[1], 1, nb_classes=7) for i, l in zip(images, labels): ds.addSample(i, [l - 1]) # ds._convertToOneOfMany() test, train = ds.splitWithProportion(0.2) test._convertToOneOfMany() train._convertToOneOfMany() net = shortcuts.buildNetwork(train.indim, 1000, train.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, dataset=train, momentum=0.1, learningrate=0.01, weightdecay=0.05) # trainer = RPropMinusTrainer(net, dataset=train) # cv = validation.CrossValidator(trainer, ds) # print cv.validate() net.randomize() tr_labels_2 = net.activateOnDataset(train).argmax(axis=1) trnres = percentError(tr_labels_2, train["class"]) # trnres = percentError(trainer.testOnClassData(dataset=train), train['class']) testres = percentError(trainer.testOnClassData(dataset=test), test["class"]) print "Training error: %.10f, Test error: %.10f" % (trnres, testres) print "Iters: %d" % trainer.totalepochs for i in range(100): trainer.trainEpochs(10) trnres = percentError(trainer.testOnClassData(dataset=train), train["class"]) testres = percentError(trainer.testOnClassData(dataset=test), test["class"]) trnmse = trainer.testOnData(dataset=train) testmse = trainer.testOnData(dataset=test) print "Iteration: %d, Training error: %.5f, Test error: %.5f" % (trainer.totalepochs, trnres, testres) print "Training MSE: %.5f, Test MSE: %.5f" % (trnmse, testmse)
class ImageData(Data): image_x = 1 image_y = 1 images = [] targets = [] def __init__(self, images, targets, image_x, image_y, description="Image Data", outputs=1): Data.__init__(self, description, outputs) self.images = images self.targets = targets self.image_x = image_x self.image_y = image_y self.create_classifier() def create_classifier(self): #print "Image X:", self.image_x #print "Image Y:", self.image_y vector_length = self.image_x * self.image_y #Create the classifier #print "Creating Classifier. Vector_Len:", vector_length, "Output Vector:", self.outputs self.classifier = ClassificationDataSet(vector_length, self.outputs, nb_classes=(len(self.images) / 10)) #print "Adding samples for", len(self.images), " images" for i in xrange(len(self.images)): #Assign images to their targets in the classifier #print i, "Image:", self.images[i], "Target:", self.targets[i] self.classifier.addSample(self.images[i], self.targets[i]) def print_data(self): print "Image Object:" + str(this.data_unit) def add_image(self, image, target): self.images.append(image) self.targets.append(target)
means = [(-1,0),(2,4),(3,1)] cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] alldata = ClassificationDataSet(inputDim, 1, nb_classes=2) #input = np.array([ myclones_data[n][16], myclones_data[n][17], myclones_data[n][18], myclones_data[n][15],myclones_data[n][11],myclones_data[n][12], myclones_data[n][26], myclones_data[n][27]] ) for n in xrange(len(myclones_data)): #for klass in range(3): input = np.array( [myclones_data[n][16], myclones_data[n][17], myclones_data[n][18], myclones_data[n][15], myclones_data[n][11], myclones_data[n][12], myclones_data[n][26], myclones_data[n][27]]) #print (n, "-->", input) alldata.addSample(input, int(myclones_data[n][35])) tstdata, trndata = alldata.splitWithProportion( 0.85 ) print("Class Label --> ", int(tstdata.getSample(1)[1])) tmp_tst_for_validation = tstdata tstdata_new = ClassificationDataSet(inputDim, 1, nb_classes=2) for n in xrange(0, tstdata.getLength()): tstdata_new.addSample( tstdata.getSample(n)[0], tstdata.getSample(n)[1] ) trndata_new = ClassificationDataSet(inputDim, 1, nb_classes=2)
from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SoftmaxLayer from pybrain.tools.xml.networkwriter import NetworkWriter from pybrain.tools.xml.networkreader import NetworkReader ''' Function or Class ''' if __name__ == "__main__": X = datasets.load_iris()['data'] y = datasets.load_iris()['target'] Dim = X.shape[1] NNData = ClassificationDataSet(Dim) for Idx in range(len(X)): NNData.addSample(np.ravel(X[Idx]), y[Idx]) TrainData, TestData = NNData.splitWithProportion(0.25) TrainData._convertToOneOfMany() TestData._convertToOneOfMany() print TrainData.indim print TrainData.outdim HiddenNum = int( len(TrainData) / float(2 * (TrainData.indim + TrainData.outdim))) print HiddenNum NNNetwork = buildNetwork(TrainData.indim, HiddenNum, TrainData.outdim, outclass=SoftmaxLayer)
############################################################################# # [set Data] #CSV_TRAIN = "dataset/train_na2zero.csv" #CSV_TEST = "dataset/test_na2zero.csv" CSV_TRAIN = "dataset/train_zero_60x60.csv" CSV_TEST = "dataset/test_zero_60x60.csv" df_train = pd.read_csv(CSV_TRAIN) Y = df_train.y Y = Y -1 # in order to make target in the range of [0, 1, 2, 3, ...., 11] X = df_train.iloc[:, 1:].values alldata = ClassificationDataSet(inp=X.shape[1], target=1, nb_classes=12) for i in range(X.shape[0]): alldata.addSample(X[i, :], [Y[i]]) alldata._convertToOneOfMany() df_test = pd.read_csv(CSV_TEST) test_X = df_test.iloc[:, 1:].values print "Number of training patterns: ", len(alldata) print "Input and output dimensions: ", alldata.indim, alldata.outdim print "First sample (input, target, class):" print alldata['input'][0], alldata['target'][0], alldata['class'][0] ############################################################################# # fnn n = buildNetwork(alldata.indim, 1000, 1000, 1000, alldata.outdim, outclass=SoftmaxLayer, bias=True) print("\n[ Network Structure]\n",n)
def perceptron(hidden_neurons=5, weightdecay=0.01, momentum=0.1): INPUT_FEATURES = 2 CLASSES = 3 HIDDEN_NEURONS = hidden_neurons WEIGHTDECAY = weightdecay MOMENTUM = momentum # Generate the labeled set g = generate_data() #g = generate_data2() alldata = g['d'] minX, maxX, minY, maxY = g['minX'], g['maxX'], g['minY'], g['maxY'] # Split data into test and training dataset tstdata, trndata = alldata.splitWithProportion(0.25) trndata._convertToOneOfMany() # This is necessary, but I don't know why tstdata._convertToOneOfMany() # http://stackoverflow.com/q/8154674/562769 print("Number of training patterns: %i" % len(trndata)) print("Input and output dimensions: %i, %i" % (trndata.indim, trndata.outdim)) print("Hidden neurons: %i" % HIDDEN_NEURONS) print("First sample (input, target, class):") print(trndata['input'][0], trndata['target'][0], trndata['class'][0]) fnn = buildNetwork(trndata.indim, HIDDEN_NEURONS, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(fnn, dataset=trndata, momentum=MOMENTUM, verbose=True, weightdecay=WEIGHTDECAY) # Visualization ticksX = arange(minX - 1, maxX + 1, 0.2) ticksY = arange(minY - 1, maxY + 1, 0.2) X, Y = meshgrid(ticksX, ticksY) # need column vectors in dataset, not arrays griddata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(X.size): griddata.addSample([X.ravel()[i], Y.ravel()[i]], [0]) for i in range(20): trainer.trainEpochs(1) trnresult = percentError(trainer.testOnClassData(), trndata['class']) tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) print("epoch: %4d" % trainer.totalepochs, " train error: %5.2f%%" % trnresult, " test error: %5.2f%%" % tstresult) out = fnn.activateOnDataset(griddata) # the highest output activation gives the class out = out.argmax(axis=1) out = out.reshape(X.shape) figure(1) # always print on the same canvas ioff() # interactive graphics off clf() # clear the plot for c in [0, 1, 2]: here, _ = where(tstdata['class'] == c) plot(tstdata['input'][here, 0], tstdata['input'][here, 1], 'o') if out.max() != out.min(): # safety check against flat field contourf(X, Y, out) # plot the contour ion() # interactive graphics on draw() # update the plot ioff() show()
train_data = ClassificationDataSet(n_features, 1, nb_classes=2) test_data = ClassificationDataSet(n_features, 1, nb_classes=2) all_data = ClassificationDataSet(n_features, 1, nb_classes=2) # train_data = SupervisedDataSet(n_features, 1) # test_data = SupervisedDataSet(n_features, 1) # all_data = SupervisedDataSet(n_features, 1) target = (y == 1) * 1 # target = y + 1 # target = y for i in xrange(N_train): if y[i] != 0: train_data.addSample(X_new[i, ], [target[i]]) for i in xrange(N_train + 1, N_test_end): if y[i] != 0: test_data.addSample(X_new[i, ], [target[i]]) for i in xrange(X_new.shape[0]): all_data.addSample(X_new[i, ], [target[i]]) train_data._convertToOneOfMany() test_data._convertToOneOfMany() all_data._convertToOneOfMany() print("building") fnn = buildNetwork(train_data.indim, 6,
from pybrain.datasets import ClassificationDataSet from pybrain.utilities import percentError from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SoftmaxLayer from pybrain.tools.xml.networkwriter import NetworkWriter ds = ClassificationDataSet(79, 1, nb_classes=2) tf = open('./finalFeaturestraining.csv', 'rb') for line in tf.readlines(): data = [float(x) for x in line.strip().split(',') if x != ''] indata = tuple(data[:79]) # print len(indata) outdata = tuple(data[79:80]) # print len(outdata) ds.addSample(indata, outdata) tstdata, trndata = ds.splitWithProportion(0.0) n = buildNetwork(trndata.indim, 30, 30, trndata.outdim, recurrent=True) t = BackpropTrainer(n, dataset=trndata, learningrate=0.001, momentum=0.3, verbose=True) t.trainEpochs(100) trnresult = percentError(t.testOnClassData(), trndata['class']) # tstresult = percentError( t.testOnClassData( dataset=tstdata ), tstdata['class'] ) print "epoch: %4d" % t.totalepochs, \ " train error: %5.2f%%" % trnresult
def main(): in_data = np.genfromtxt('logit-train.csv', delimiter=',') out_data = np.genfromtxt('logit-test.csv', delimiter=',') #getting in the data from csv files and making it suitable for further action. in_data = in_data[~np.isnan(in_data).any(1)] t = len(in_data[0, :]) y_train = np.array(in_data[0:, t - 1]) x_train = np.array(in_data[0:, :t - 1]) scaler = preprocessing.StandardScaler().fit( x_train) #standardization plays an important role in all NN algos x_train = scaler.transform(x_train) #final x_train out_data = out_data[~np.isnan(out_data).any(1)] t = len(out_data[0, :]) y_test = np.array(out_data[0:, t - 1]) x_test = np.array(out_data[0:, :t - 1]) x_test = scaler.transform(x_test) # final x_test alltraindata = ClassificationDataSet(t - 1, 1, nb_classes=2) for count in range(len((in_data))): alltraindata.addSample(x_train[count], [y_train[count]]) alltraindata._convertToOneOfMany(bounds=[0, 1]) alltestdata = ClassificationDataSet(t - 1, 1, nb_classes=2) for count in range(len((out_data))): alltestdata.addSample(x_test[count], [y_test[count]]) alltestdata._convertToOneOfMany(bounds=[0, 1]) numRBFCenters = 50 kmeans = KMeans(n_clusters=numRBFCenters ) # KMeans to find the centroids for the RBF neurons. kmeans.fit(alltraindata['input']) centers = kmeans.cluster_centers_ #centers.shape = (numRBFCenters,13) cluster_distance = kmeans.transform(alltraindata['input']) #cluster_distance.shape = (152,10) and kmeans.labels_.shape = (152,) #cluster_distance.shape = (152,50) # Calculating the sigma/smoothness parameter of each Radial Basis Function # It is the variance/standard deviation of the points of each cluster, thus giving a value for each RBFcenter distance_std = [] distance_within_cluster = [] for lab in range(numRBFCenters): for x, label in enumerate(kmeans.labels_): if label == lab: distance_within_cluster.append(cluster_distance[x][label]) distance_std.append(np.std(distance_within_cluster)) rbf = RBFNN( alltraindata.indim, alltraindata.outdim, numRBFCenters, centers, distance_std) # Passing the centers array for RBFNN initialization rbf.train(alltraindata['input'], alltraindata['target']) testdata_target = rbf.test( alltestdata['input'] ) #values obtained after testing, T is a 'n x outdim' matrix testdata_target = testdata_target.argmax( axis=1 ) # the highest output activation gives the class. Selects the class predicted traindata_target = rbf.test(alltraindata['input']) traindata_target = traindata_target.argmax( axis=1 ) # the highest output activation gives the class. Selects the class predicted #compare to y_test to obtain the accuracy. # count=0 # for x in range(len(y_test)): # if testdata_target[x] == y_test[x]: # count+=1 # tstresult2=float(count)/float(len(y_test)) * 100 trnresult = percentError(traindata_target, alltraindata['class']) tstresult = percentError(testdata_target, alltestdata['class']) print "Accuracy on train data is: %5.2f%%," % (100 - trnresult) print "Accuracy on test data is: %5.2f%%," % (100 - tstresult) for x in range(len(y_test)): if any(y_test[x]) == True: y_test[x] = 1 else: y_test[x] = 0 average_label = ['micro', 'macro', 'weighted'] for label in average_label: f1 = f1_score(y_test, testdata_target, average=label) print "f1 score (%s)" % label, "is ", f1
if __name__ == "__main__": breast_cancer = datasets.load_breast_cancer() X, y = breast_cancer.data, breast_cancer.target model = cluster.KMeans(n_clusters=2) labels = model.fit_predict(X) print X.shape X = np.concatenate((X, np.expand_dims(labels, axis=1)), axis=1) print X.shape ds = ClassificationDataSet(X.shape[1], 2) for k in xrange(len(X)): ds.addSample(X[k], y[k]) tstdata, trndata = ds.splitWithProportion(0.3) max_epochs = 1000 # List all the different networks we want to test net = buildNetwork(trndata.indim, 15, trndata.outdim, outclass=SigmoidLayer, bias=True) print net # Setup a trainer that will use backpropogation for training trainer = BackpropTrainer(net,
net.sortModules() print net digits = load_digits() X, y = digits.data, digits.target print(X.shape) plt.gray() plt.matshow(digits.images[2]) plt.show() daSet = ClassificationDataSet(len(t), 1) for k in xrange(len(X)): daSet.addSample(X.ravel()[k], y.ravel()[k]) testData, trainData = daSet.splitWithProportion(0.25) trainData._convertToOneOfMany() testData._convertToOneOfMany() #for inpt, target in daSet: # print inpt, target trainer = BackpropTrainer(net, dataset=trainData, momentum=0.1, learningrate=0.01, verbose=True) trainer.trainEpochs(50)
print "Contents of cov", cov # creating the Dataset to add all the data #arguments: # input, output, nb_classes=3 alldata = ClassificationDataSet(2, 1, nb_classes=3) print "initial contents of the data", alldata for n in xrange(400): for klass in range(3): # print "value of klass", klass #so we are choosing some value of mean and some value of variance and then #then adding all the data to the sample. input = multivariate_normal(means[klass], cov[klass]) # print "here is the input", input alldata.addSample(input, [klass]) print "the length of the final dataset is ", len(alldata) #I am finding the length of the entire dataset and that is expected to be 1200 which it is. The # the thing that I am more concerned about is that when you print out the alldata, you get a wierd # number like 2056 or 2048 print "here is the whole data " print "*" * 20 print alldata # splitting the data between the trainings_et and testing_set tstdata_temp, trndata_temp = alldata.splitWithProportion(0.25) # tstdata, trndata = alldata.splitWithProportion( 0.25 ) #here are are just copying the data from the temp variables to the actual vaiables that I will be using tstdata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(0, tstdata_temp.getLength()):
def main(): """ CLI Arguments allowed: --display_graphs Displays graphs --retrain Trains a new model --cross-validate Runs cross validation to fine tune the model --test=validation_set Tests the latest trained model against the validation set --test=test_set Tests the latets trained model against the test set """ global trainer, classifier inputs_train, targets_train, inputs_valid, targets_valid, inputs_test, targets_test = load_parsed_data() if '--display_graphs' in sys.argv: display_graphs = True print('using {} percent of all data in corpus'.format(PERCENTAGE_DATA_SET_TO_USE*100)) print('using {} most common words as features'.format(NUM_FEATURES)) if not trained_model_exists() or '--retrain' in sys.argv: train_features, valid_features, test_features = extract_features( inputs_train[:len(inputs_train)*PERCENTAGE_DATA_SET_TO_USE], targets_train[:len(targets_train)*PERCENTAGE_DATA_SET_TO_USE], inputs_valid[:len(inputs_valid)*PERCENTAGE_DATA_SET_TO_USE], targets_valid[:len(targets_valid)*PERCENTAGE_DATA_SET_TO_USE], inputs_test[:len(inputs_test)*PERCENTAGE_DATA_SET_TO_USE], targets_test[:len(targets_test)*PERCENTAGE_DATA_SET_TO_USE] ) save_features(train_features, valid_features, test_features) pca = RandomizedPCA(n_components=N_COMPONENTS, whiten=False).fit(train_features) save_pca(pca) print ("Saved PCA") X_train = pca.transform(train_features) X_valid = pca.transform(valid_features) pca = None print ("Created PCAd features") valid_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2) for i in range(len(X_valid)): valid_data.addSample(X_valid[i], targets_test[i]) valid_data._convertToOneOfMany() X_valid = None train_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2) for i in range(len(X_train)): train_data.addSample( X_train[i], targets_train[i]) train_data._convertToOneOfMany() X_train = None classifier = buildNetwork( train_data.indim, N_HIDDEN, train_data.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer( classifier, dataset=train_data, momentum=0.1, learningrate=0.01 , verbose=True) train_model(train_data, valid_data) save_model(classifier) train_data = None valid_data = None else: train_features, valid_features, test_features = load_features() pca = load_pca() X_train = pca.transform(train_features) pca = None print ("Created PCAd features") train_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2) for i in range(len(X_train)): train_data.addSample( X_train[i], targets_train[i]) train_data._convertToOneOfMany() X_train = None classifier = load_trained_model() trainer = BackpropTrainer( classifier, dataset=train_data, momentum=0.1, learningrate=0.01 , verbose=True) if '--test=validation_set' in sys.argv: print ("Running against validation set") pca = load_pca() X_valid = pca.transform(valid_features) pca = None valid_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2) for i in range(len(X_valid)): valid_data.addSample( X_valid[i], targets_test[i]) valid_data._convertToOneOfMany() X_valid = None make_prediction(valid_data) if '--test=test_set' in sys.argv: print ("Running against test set") pca = load_pca() X_test = pca.transform(test_features) pca = None test_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2) for i in range(len(X_test)): test_data.addSample( X_test[i], targets_test[i]) test_data._convertToOneOfMany() y_pred = trainer.testOnClassData(dataset=test_data) plot_precision_and_recall(y_pred, targets_test[:len(targets_test) * PERCENTAGE_DATA_SET_TO_USE]) X_test = None make_prediction(test_data)
from pybrain.datasets import ClassificationDataSet from pybrain.utilities import percentError from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SoftmaxLayer from pylab import ion, ioff, figure, draw, contourf, clf, show, hold, plot from scipy import diag, arange, meshgrid, where from numpy.random import multivariate_normal means = [(-1, 0), (2, 4), (3, 1)] cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])] alldata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(400): for klass in range(3): input = multivariate_normal(means[klass], cov[klass]) alldata.addSample(input, [klass]) tstdata, trndata = alldata.splitWithProportion(0.25) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print tstdata fnn = buildNetwork(trndata.indim, 3, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) for i in range(10): trainer.trainEpochs(1) trnresult = percentError(trainer.testOnClassData(), trndata['class']) tstresult = percentError(trainer.testOnClassData(dataset=tstdata),
# removing not floats crimes = crimes.drop(['Dates', 'DayOfWeek', 'Address', 'date_obj', 'Descript'], axis=1) crimes = crimes.drop(['Category', 'PdDistrict', 'Resolution'], axis=1) return crimes print "preprocessing" crimes = process(crimes) X = crimes.drop(['category_ids'], axis=1) # X = normalize(X, axis=0) y = crimes['category_ids'] print "making net" ds = ClassificationDataSet(35, 1 , nb_classes=39) for k in xrange(len(X)): ds.addSample(X.iloc[[k]],y.iloc[[k]]) print "cleaning data" tstdata, trndata = ds.splitWithProportion( 0.5 ) trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) print "training" hidden_layer = int((trndata.indim + trndata.outdim) / 2) fnn = FeedForwardNetwork() inLayer = LinearLayer(trndata.indim) outLayer = SoftmaxLayer(trndata.outdim) prev = None fnn.addInputModule(inLayer)
features_train = features_pd.iloc[:train_count] # print(features_train.describe()) features_test = features_pd.iloc[train_count:] # print(features_test.describe()) x_train, x_test, y_train, y_test = train_test_split( features_train, labels, test_size=0.2) #, random_state=1) X = (x_train, x_test, y_train, y_test) # print(cross_val_score(svc, features_train, labels, scoring="neg_mean_squared_error", cv=10).mean()) # print(cross_val_score(linear_svc, features_train, labels, scoring="neg_mean_squared_error", cv=10).mean()) dsTrain = ClassificationDataSet(18, 1, nb_classes=2) rows = len(x_train) for row in range(rows): dsTrain.addSample(tuple(x_train.iloc[row]), y_train.iloc[row]) dsTrain._convertToOneOfMany() dsTest = ClassificationDataSet(18, 1, nb_classes=2) rows = len(x_test) for row in range(rows): dsTest.addSample(tuple(x_test.iloc[row]), y_test.iloc[row]) dsTest._convertToOneOfMany() svc = None fnn = None if False: svc = svm.SVC(kernel='rbf', C=10, random_state=1, gamma=0.1, max_iter=1000) linear_svc = svm.LinearSVC(C=10, random_state=1, max_iter=100) pred = train_model(svc, X).predict(features_test) # print(pred, len(pred), pred.mean())
Y_n = [labels.index(e) for e in Y] test_Y_n = [labels.index(e) for e in test_Y] # normalize the feature to [-1, 1] X_mean = np.mean(X, axis=0) norm_X = [line - X_mean for line in np.array(X)] norm_test_X = [line - X_mean for line in np.array(test_X)] examples = [] for i in range(0, len(norm_X)): examples.append((norm_X[i], Y_n[i])) shuffle(examples) alldata = ClassificationDataSet(5400, 1, nb_classes=29) for i in range(0, len(examples)): alldata.addSample(examples[i][0], [examples[i][1]]) tstdata, trndata = alldata.splitWithProportion(0.25) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] fnn = buildNetwork(trndata.indim, 10, trndata.outdim, hiddenclass=TanhLayer, outclass=SoftmaxLayer)
def trainet2(data, nhide=8, nhide1=8, epo=10, wd=.1, fn=''): alldata = data tstdata_temp, trndata_temp = alldata.splitWithProportion(0.5) tstdata = ClassificationDataSet(alldata.indim, nb_classes=alldata.nClasses) for n in range(0, tstdata_temp.getLength()): tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(alldata.indim, nb_classes=alldata.nClasses) for n in range(0, trndata_temp.getLength()): trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) tstdata._convertToOneOfMany() trndata._convertToOneOfMany() net = FeedForwardNetwork() inLayer = LinearLayer(trndata.indim) hiddenLayer = TanhLayer(nhide) hiddenLayer1 = TanhLayer(nhide1) outLayer = LinearLayer(trndata.outdim) net.addInputModule(inLayer) net.addModule(hiddenLayer) net.addModule(hiddenLayer1) net.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer, hiddenLayer) hidden_to_hidden = FullConnection(hiddenLayer, hiddenLayer1) hidden_to_out = FullConnection(hiddenLayer1, outLayer) net.addConnection(in_to_hidden) net.addConnection(hidden_to_hidden) net.addConnection(hidden_to_out) net.sortModules() net.bias = True trainer = BackpropTrainer(net, dataset=trndata, verbose=True, weightdecay=wd, momentum=0.1) edata = [] msedata = [] for i in range(epo): trainer.trainEpochs(1) trnresult = percentError(trainer.testOnClassData(), trndata['class']) tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) tod = trainer.testOnData(verbose=False) print("epoch: %4d" % trainer.totalepochs, " train error: %5.2f%%" % trnresult, " test error: %5.2f%%" % tstresult, " layers: ", nhide1, " N_tourn: ", alldata.indim / 2) edata.append([trnresult, tstresult]) msedata.append([i, tod]) with open(fn + ".dta", 'w') as fp: json.dump(edata, fp) with open(fn + ".mse", 'w') as fp: json.dump(msedata, fp) return net
target=self['target'][rightIndicies].copy()) return leftDs, rightDs # Load iris data irisData = datasets.load_iris() dataFeatures = irisData.data dataTargets = irisData.target # Create data set object dataSet = ClassificationDataSet(4, 1, nb_classes=3) # 3 - classes of iris # Add data to out data set for i in range(len(dataFeatures)): dataSet.addSample(np.ravel(dataFeatures[i]), dataTargets[i]) # Split data in train and test sets trainingData, testData = splitWithProportion(dataSet, 0.7) # Convert data classes to (1,0,0), (0,1,0), (0,0,1) trainingData._convertToOneOfMany() testData._convertToOneOfMany() # Build neural network neuralNetwork = buildNetwork(trainingData.indim, 7, trainingData.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(neuralNetwork, dataset=trainingData, momentum=0.01, learningrate=0.05, verbose=True) # Train for 10 000 iterations and print error trainer.trainEpochs(10000) print('Error (test dataset): ', percentError(trainer.testOnClassData(dataset=testData), testData['class']))
def generate_data(): index = [ 3471, 791, 458, 3068, 1542, 524, 278, 526, 5769, 3129, 5440, 166, 4577, 5714, 1692, 546, 402, 2552, 4129, 1894, 4743, 1809, 630, 208, 818, 6034, 3988, 3981, 4580, 134, 1289, 5712, 4723, 4961, 3417, 2630, 994, 689, 5770, 3122, 4823, 4508, 2696, 5566, 2136, 4217, 1503, 1448, 3117, 1161, 5385, 6095, 2197, 325, 2310, 4990, 2009, 5880, 3900, 1715, 1573, 1488, 1125, 3533, 3004, 55, 4424, 3077, 499, 144, 5976, 4643, 3219, 2328, 1770, 1510, 770, 107, 1625, 4684, 4544, 4470, 3684, 3607, 942, 671, 5796, 3773, 2204, 2083, 345, 3942, 6113, 6112, 6111, 6110, 6109, 6108, 6107, 6106, 6105, 6104, 6103, 6102, 6101, 6100, 6099, 6098, 6097, 6096, 6094, 6093, 6092, 6091, 6090, 6089, 6088, 6087, 6086, 6085, 6084, 6083, 6082, 6081, 6080, 6079, 6078, 6077, 6076, 6075, 6074, 6073, 6072, 6071, 6070, 6069, 6068, 6067, 6066, 6065, 6064, 6063, 6062, 6061, 6060, 6059, 6058, 6057, 6056, 6055, 6054, 6053, 6052, 6051, 6050, 6049, 6048, 6047, 6046, 6045, 6044, 6043, 6042, 6041, 6040, 6039, 6038, 6037, 6036, 6035, 6033, 6032, 6031, 6030, 6029, 6028, 6027, 6026, 6025, 6024, 6023, 6022, 6021, 6020, 6019, 6018, 6017, 6016, 6015, 6014, 6013, 6012, 6011, 6010, 6009, 6008, 6007, 6006, 6005, 6004, 6003, 6002, 6001, 6000, 5999, 5998, 5997, 5996, 5995, 5994, 5993, 5992, 5991, 5990, 5989, 5988, 5987, 5986, 5985, 5984, 5983, 5982, 5981, 5980, 5979, 5978, 5977, 5975, 5974, 5973, 5972, 5971, 5970, 5969, 5968, 5967, 5966, 5965, 5964, 5963, 5962, 5961, 5960, 5959, 5958, 5957, 5956, 5955, 5954, 5953, 5952, 5951, 5950, 5949, 5948, 5947, 5946, 5945, 5944, 5943, 5942, 5941, 5940, 5939, 5938, 5937, 5936, 5935, 5934, 5933, 5932, 5931, 5930, 5929, 5928, 5927, 5926, 5925, 5924, 5923, 5922, 5921, 5920, 5919, 5918, 5917, 5916, 5915, 5914, 5913, 5912, 5911, 5910, 5909, 5908, 5907, 5906, 5905, 5904, 5903, 5902, 5901, 5900, 5899, 5898, 5897, 5896, 5895, 5894, 5893, 5892, 5891, 5890, 5889, 5888, 5887, 5886, 5885, 5884, 5883, 5882, 5881, 5879, 5878, 5877, 5876, 5875, 5874, 5873, 5872, 5871, 5870, 5869, 5868, 5867, 5866, 5865, 5864, 5863, 5862, 5861, 5860, 5859, 5858, 5857, 5856, 5855, 5854, 5853, 5852, 5851, 5850, 5849, 5848, 5847, 5846, 5845, 5844, 5843, 5842, 5841, 5840, 5839, 5838, 5837, 5836, 5835, 5834, 5833, 5832, 5831, 5830, 5829, 5828, 5827, 5826, 5825, 5824, 5823, 5822, 5821, 5820, 5819, 5818, 5817, 5816, 5815, 5814, 5813, 5812, 5811, 5810, 5809, 5808, 5807, 5806, 5805, 5804, 5803, 5802, 5801, 5800, 5799, 5798, 5797, 5795, 5794, 5793, 5792, 5791, 5790, 5789, 5788, 5787, 5786, 5785, 5784, 5783, 5782, 5781, 5780, 5779, 5778, 5777, 5776, 5775, 5774, 5773, 5772, 5771, 5768, 5767, 5766, 5765, 5764, 5763, 5762, 5761, 5760, 5759, 5758, 5757, 5756, 5755, 5754, 5753, 5752, 5751, 5750, 5749, 5748, 5747, 5746, 5745, 5744, 5743, 5742, 5741, 5740, 5739, 5738, 5737, 5736, 5735, 5734, 5733, 5732, 5731, 5730, 5729, 5728, 5727, 5726, 5725, 5724, 5723, 5722, 5721, 5720, 5719, 5718, 5717, 5716, 5715, 5713, 5711, 5710, 5709, 5708, 5707, 5706, 5705, 5704, 5703, 5702, 5701, 5700, 5699, 5698, 5697 ] INPUT_FEATURES = 500 CLASSES = 9 #train_text,train_classfi = getTargetData("Breast_train.data") #Load boston housing dataset as an example train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData( "nci60_train_m.txt") train_text = getIndexData(train_text, index) alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i] == "1": klass = 0 alldata.addSample(features, klass) elif train_classfi[i] == "2": klass = 1 alldata.addSample(features, klass) elif train_classfi[i] == "3": klass = 2 alldata.addSample(features, klass) elif train_classfi[i] == "4": klass = 3 alldata.addSample(features, klass) elif train_classfi[i] == "5": klass = 4 alldata.addSample(features, klass) elif train_classfi[i] == "6": klass = 5 alldata.addSample(features, klass) elif train_classfi[i] == "7": klass = 6 alldata.addSample(features, klass) elif train_classfi[i] == "8": klass = 7 alldata.addSample(features, klass) elif train_classfi[i] == "9": klass = 8 alldata.addSample(features, klass) return { 'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata, 'index': index }
def generate_data(): INPUT_FEATURES = 16063 CLASSES = 15 train_text,train_classfi = getTargetData("GCM_train.data") alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i]=="Breast" : klass = 0 alldata.addSample(features, klass) elif train_classfi[i]=="Prostate" : klass = 1 alldata.addSample(features, klass) elif train_classfi[i]=="Lung" : klass = 2 alldata.addSample(features, klass) elif train_classfi[i]=="Colorectal" : klass = 3 alldata.addSample(features, klass) elif train_classfi[i]=="Lymphoma" : klass = 4 alldata.addSample(features, klass) elif train_classfi[i]=="Bladder" : klass = 5 alldata.addSample(features, klass) elif train_classfi[i]=="Melanoma" : klass = 6 alldata.addSample(features, klass) elif train_classfi[i]=="Uterus" : klass = 7 alldata.addSample(features, klass) elif train_classfi[i]=="Leukemia" : klass = 8 alldata.addSample(features, klass) elif train_classfi[i]=="Renal" : klass = 9 alldata.addSample(features, klass) elif train_classfi[i]=="Pancreas" : klass = 10 alldata.addSample(features, klass) elif train_classfi[i]=="Ovary" : klass = 11 alldata.addSample(features, klass) elif train_classfi[i]=="Mesothelioma" : klass = 12 alldata.addSample(features, klass) elif train_classfi[i]=="CNS" : klass = 13 alldata.addSample(features, klass) return {'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata}
def generate_Testdata(index): INPUT_FEATURES = 500 CLASSES = 9 #train_text,train_classfi = getTargetData("Breast_train.data") #Load boston housing dataset as an example train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData( "nci60_test_m.txt") train_text = getIndexData(train_text, index) alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i] == "1": klass = 0 alldata.addSample(features, klass) elif train_classfi[i] == "2": klass = 1 alldata.addSample(features, klass) elif train_classfi[i] == "3": klass = 2 alldata.addSample(features, klass) elif train_classfi[i] == "4": klass = 3 alldata.addSample(features, klass) elif train_classfi[i] == "5": klass = 4 alldata.addSample(features, klass) elif train_classfi[i] == "6": klass = 5 alldata.addSample(features, klass) elif train_classfi[i] == "7": klass = 6 alldata.addSample(features, klass) elif train_classfi[i] == "8": klass = 7 alldata.addSample(features, klass) elif train_classfi[i] == "9": klass = 8 alldata.addSample(features, klass) return { 'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata, 'index': index }
class NeuralNetwork(BaseWorkflow): def __init__(self, purpose='train', num_inputs=None, num_ouputs=None, classes=None, class_lables=None): super(NeuralNetwork, self).__init__() self.purpose = purpose self.data_path = self.config.neural_net.get(self.purpose, None) self.file_name = 'neural_net' self.all_data = ClassificationDataSet(num_inputs, num_ouputs, nb_classes=classes, class_labels=class_lables) self.train = None self.test = None self.neural_network = None self.train_result = None self.test_result = None self.cross_validation_result = None def process(self): self.prepare_train_test() self.build_network() trainer = self.train_network(dataset=self.train) self.score_train_test(trainer=trainer) self.cross_validate(dataset=self.all_data) def add_sample(self, correlogram_matrix=None, target=None, sample_path=None): self.all_data.addSample(correlogram_matrix, target) logger.info( 'sample added from {sample_path}'.format(sample_path=sample_path)) def prepare_train_test(self): self.test, self.train = self.all_data.splitWithProportion(0.25) def build_network(self): self.neural_network = buildNetwork( self.train.indim, 7, self.train.outdim, outclass=SoftmaxLayer) # feed forward network def train_network(self, dataset=None): starter_trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01) starter_trainer.trainUntilConvergence(validationProportion=0.25, maxEpochs=100) return starter_trainer def score_train_test(self, trainer=None): self.test_result = percentError( trainer.testOnClassData(dataset=self.test), self.test['class']) logger.info( 'test error result: {result}'.format(result=self.test_result)) self.train_result = percentError( trainer.testOnClassData(dataset=self.train), self.train['class']) logger.info( 'train error result: {result}'.format(result=self.train_result)) def cross_validate(self, dataset=None): trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01) validator = CrossValidator(trainer=trainer, dataset=dataset, n_folds=10) mean_validation_result = validator.validate() self.cross_validation_result = mean_validation_result logger.info('cross val result: {result}'.format( result=self.cross_validation_result)) @staticmethod def save_network_to_xml(net=None, file_name=None): NetworkWriter.writeToFile(net, file_name) @staticmethod def read_network_from_xml(file_name=None): return NetworkReader.readFrom(file_name)
from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SoftmaxLayer from pylab import ion, ioff, figure, draw, contourf, clf, show, hold, plot from scipy import diag, arange, meshgrid, where from numpy.random import multivariate_normal #To have a nice dataset for visualization, we produce a set of points in 2D belonging to three different classes. means = [(-1, 0), (2, 4), (3, 1)] cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])] alldata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(400): for klass in range(3): input = multivariate_normal(means[klass], cov[klass]) alldata.addSample(input, [klass]) #Randomly split the dataset into 75% training and 25% test data sets. Of course, we could also have created two different datasets to begin with. tstdata, trndata = alldata.splitWithProportion(0.25) #For neural network classification, it is highly advisable to encode classes with one output neuron per class. Note that this operation duplicates the original targets and stores them in an (integer) field named "class". trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] #Now build a feed-forward network with 5 hidden units. We use the shortcut buildNetwork() for this. The input and output layer size must match the dataset's input and target dimension. #net = buildNetwork(2, 5, 3)
from sklearn import datasets from matplotlib import pyplot as plt import sys #from pybrain.datasets import ClassificationDataSet digits = datasets.load_digits() X, y = digits.data, digits.target print(X[0].shape) ds = ClassificationDataSet(64, 10, nb_classes=10) test = ClassificationDataSet(64, 10, nb_classes=10) training = ClassificationDataSet(64, 10, nb_classes=10) for k in xrange(len(X)): ds.addSample(ravel(X[k]), y[k]) test_t, training_t = ds.splitWithProportion(0.25) for k in xrange(0, test_t.getLength()): test.addSample(test_t.getSample(k)[0], test_t.getSample(k)[1]) for k in xrange(0, training_t.getLength()): training.addSample(training_t.getSample(k)[0], training_t.getSample(k)[1]) print(training.getLength()) print(test.getLength()) print(test.indim) print(test.outdim) print(training.indim)
NetworkWriter.writeToFile(nn, filename) ######################################################################################################################## # seed random numbers to make calculation deterministic (just a good practice) np.random.seed(1) olivetti = datasets.fetch_olivetti_faces() X, y = olivetti.data, olivetti.target print "data shape of faces:", X.shape # Flatten the 64x64 data to one dimensional 4096 and then feed the data our NN classification dataset: ds = ClassificationDataSet(4096, 1, nb_classes=40) for k in xrange(len(X)): ds.addSample(X.ravel()[k], y.ravel()[k]) # Split the data into 75% training and 25% test data tstdata, trndata = ds.splitWithProportion(0.25) # Convert 1 output to 40 binary outputs trndata._convertToOneOfMany() tstdata._convertToOneOfMany() # Check the data inside the neural network: print trndata['input'], trndata['target'], tstdata.indim, tstdata.outdim # Now that all data is loaded, build the network and backpropagation trainer: #fnn = buildNetwork(trndata.indim, 64, trndata.outdim, outclass=SoftmaxLayer) fnn = create_or_read_from_file(trndata) trainer = BackpropTrainer(fnn,
def generate_data(): index = [ 9154, 5123, 2407, 680, 548, 8016, 15755, 9861, 461, 5552, 6834, 6268, 14112, 15285, 13065, 8838, 2962, 6581, 4025, 14928, 10521, 1413, 3587, 3537, 13462, 9809, 4128, 15806, 4884, 2084, 7818, 8294, 12308, 8789, 5328, 5817, 7663, 6299, 15295, 3547, 1673, 5940, 6085, 6368, 6006, 5520, 14228, 8608, 7822, 3237, 10927, 12268, 2852, 6903, 13001, 10775, 4852, 14487, 10885, 14948, 15239, 8787, 6886, 15720, 13436, 4102, 7832, 5071, 11062, 15004, 14888, 12560, 4381, 14283, 6892, 14753, 10132, 6937, 2393, 465, 11791, 8533, 2174, 6739, 4316, 251, 11438, 10288, 6658, 6439, 6711, 5173, 11590, 1452, 524, 15677, 13742, 11881, 9299, 7499, 7068, 11457, 11128, 4936, 1634, 14692, 13352, 11896, 11895, 11494, 9704, 6878, 10112, 10027, 10207, 6946, 6604, 5563, 3590, 2817, 2661, 9667, 9609, 8368, 7538, 6830, 1909, 1385, 15043, 14006, 11050, 10743, 10306, 9574, 9546, 9267, 9232, 8546, 8452, 8027, 7465, 5453, 1903, 1747, 1367, 15496, 14231, 13894, 12340, 11433, 11118, 9223, 8369, 8017, 7324, 6737, 5047, 4635, 4631, 3685, 3418, 3215, 1395, 835, 690, 15808, 15210, 13829, 13798, 13303, 13220, 13078, 12416, 12407, 12082, 11940, 11266, 9794, 9643, 8825, 8600, 8446, 7892, 6972, 6728, 6559, 5759, 5091, 4640, 4209, 3214, 1994, 1599, 1447, 1082, 15881, 15810, 15586, 15564, 15150 ] INPUT_FEATURES = 200 CLASSES = 15 #train_text,train_classfi = getTargetData("Breast_train.data") #Load boston housing dataset as an example train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData( "GCM_train.data") train_text = getIndexData(train_text, index) alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i] == "Breast": klass = 0 alldata.addSample(features, klass) elif train_classfi[i] == "Prostate": klass = 1 alldata.addSample(features, klass) elif train_classfi[i] == "Lung": klass = 2 alldata.addSample(features, klass) elif train_classfi[i] == "Colorectal": klass = 3 alldata.addSample(features, klass) elif train_classfi[i] == "Lymphoma": klass = 4 alldata.addSample(features, klass) elif train_classfi[i] == "Bladder": klass = 5 alldata.addSample(features, klass) elif train_classfi[i] == "Melanoma": klass = 6 alldata.addSample(features, klass) elif train_classfi[i] == "Uterus": klass = 7 alldata.addSample(features, klass) elif train_classfi[i] == "Leukemia": klass = 8 alldata.addSample(features, klass) elif train_classfi[i] == "Renal": klass = 9 alldata.addSample(features, klass) elif train_classfi[i] == "Pancreas": klass = 10 alldata.addSample(features, klass) elif train_classfi[i] == "Ovary": klass = 11 alldata.addSample(features, klass) elif train_classfi[i] == "Mesothelioma": klass = 12 alldata.addSample(features, klass) elif train_classfi[i] == "CNS": klass = 13 alldata.addSample(features, klass) elif train_classfi[i] == "Colorectal": klass = 14 alldata.addSample(features, klass) return { 'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata, 'index': index }
def run(args): manual_validated_file = args.manual_validated_file # 'JHotDraw54b1_clones.xml.clones2' save_target_name = args.save_target_name #'newTrainedModel' print 'Training the Model. Please wait ...' manual_validation_data = pd.read_csv('manual_validator/input_clone_pairs/'+manual_validated_file) inputDim = 6 alldata = ClassificationDataSet(inputDim, 1, nb_classes=2) txlHelper = TXLHelper() for i in range(0, len(manual_validation_data)): #print manual_validation_data.iloc[i][3], manual_validation_data.iloc[i][4] #print manual_validation_data.iloc[i][2] cloneFragment_1_path, cloneFragment_1_start, cloneFragment_1_end = manual_validation_data.iloc[i][3].split()[0], \ manual_validation_data.iloc[i][3].split()[1], \ manual_validation_data.iloc[i][3].split()[2] cloneFragment_2_path, cloneFragment_2_start, cloneFragment_2_end = manual_validation_data.iloc[i][4].split()[0], \ manual_validation_data.iloc[i][4].split()[1], \ manual_validation_data.iloc[i][4].split()[2] cloneFragment_1 = read_file_in_line_range(filePath='manual_validator/input_clone_pairs/'+cloneFragment_1_path, \ startLine=cloneFragment_1_start, endLine=cloneFragment_1_end) cloneFragment_2 = read_file_in_line_range(filePath='manual_validator/input_clone_pairs/' + cloneFragment_2_path, startLine=cloneFragment_2_start, endLine=cloneFragment_2_end) type1sim_by_line, type2sim_by_line, type3sim_by_line = txlHelper.app_code_clone_similaritiesNormalizedByLine(cloneFragment_1, cloneFragment_2, 'java') type1sim_by_token, type2sim_by_token, type3sim_by_token = txlHelper.app_code_clone_similaritiesNormalizedByToken(cloneFragment_1, cloneFragment_2, 'java') label = manual_validation_data.iloc[i][2] if label == 'true': label = 1 else: label = 0 input = np.array([type1sim_by_token, type2sim_by_line, type3sim_by_line, type1sim_by_token, type2sim_by_token, type3sim_by_token]) alldata.addSample(input, int(label)) # # np.nan_to_num(alldata) # # alldata = alldata[~np.isnan(alldata)] # #alldata.fillna(0) # np.set_printoptions(precision=3) # print alldata # # def load_training_dataSet(fileName): # data = pd.read_csv(fileName, sep=',', header=None) # #data.columns = ["state", "outcome"] # return data # # myclones_data = load_training_dataSet('Datasets/new_dataset_with_new_features.csv') # myclones_data = myclones_data.values # # # inputDim = 6 # # # means = [(-1,0),(2,4),(3,1)] # cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] # alldata = ClassificationDataSet(inputDim, 1, nb_classes=2) # # # #input = np.array([ myclones_data[n][16], myclones_data[n][17], myclones_data[n][18], myclones_data[n][15],myclones_data[n][11],myclones_data[n][12], myclones_data[n][26], myclones_data[n][27]] ) # # for n in xrange(len(myclones_data)): # #for klass in range(3): # input = np.array( # [myclones_data[n][11], myclones_data[n][17], myclones_data[n][12], myclones_data[n][15], myclones_data[n][18], # myclones_data[n][16]]) # #print (n, "-->", input) # alldata.addSample(input, int(myclones_data[n][35])) # # tstdata, trndata = alldata.splitWithProportion( 0.25 ) #print(tstdata) tstdata_new = ClassificationDataSet(inputDim, 1, nb_classes=2) for n in xrange(0, tstdata.getLength()): tstdata_new.addSample( tstdata.getSample(n)[0], tstdata.getSample(n)[1] ) trndata_new = ClassificationDataSet(inputDim, 1, nb_classes=2) for n in xrange(0, trndata.getLength()): trndata_new.addSample( trndata.getSample(n)[0], trndata.getSample(n)[1]) trndata = trndata_new tstdata = tstdata_new #print("Before --> ", trndata) trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) fnn = buildNetwork( trndata.indim, 107, trndata.outdim, outclass=SoftmaxLayer ) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1,learningrate=0.05 , verbose=True, weightdecay=0.001) #print "Printing Non-Trained Network..." """ ticks = arange(-3.,6.,0.2) X, Y = meshgrid(ticks, ticks) # need column vectors in dataset, not arrays griddata = ClassificationDataSet(7,1, nb_classes=2) for i in xrange(X.size): griddata.addSample([X.ravel()[i],Y.ravel()[i]], [0]) griddata._convertToOneOfMany() # this is still needed to make the fnn feel comfy """ #trainer.trainEpochs(1) #trainer.testOnData(verbose=True) #print(np.array([fnn.activate(x) for x, _ in tstdata])) for i in range(1): trainer.trainEpochs(10) trnresult = percentError(trainer.testOnClassData(), trndata['class']) tstresult = percentError(trainer.testOnClassData( dataset=tstdata), tstdata['class']) #print "epoch: %4d" % trainer.totalepochs, \ # " train error: %5.2f%%" % trnresult, \ # " test error: %5.2f%%" % tstresult #print "Printing Trained Network..." #print fnn.params print "Saving the trined Model at : ", 'pybrain/'+save_target_name #saving the trained network... import pickle fileObject = open('pybrain/'+save_target_name, 'w') pickle.dump(fnn, fileObject) fileObject.close() # # fileObject = open('trainedNetwork79', 'r') # loaded_fnn = pickle.load(fileObject) # # # print "Printing the result prediction..." # # print loaded_fnn.activate([0.2,0.5,0.6,0.1,0.3,0.7]) # # print fnn.activate([0.2,0.5,0.6,0.1,0.3,0.7]) # #out = fnn.activateOnDataset(griddata) #out = out.argmax(axis=1) # the highest output activation gives the class #out = out.reshape(X.shape) """
# Imports import numpy as np from scipy import stats from pybrain.datasets import ClassificationDataSet # Data and outputs datain = np.loadtxt(open("beerdata.csv", "rb"), delimiter=",", skiprows=0) y = datain[:, 0] - 1 # 178x1 vector classifications X = datain[:, 1:] # 178x13 matrix of data points X = stats.zscore(X, axis=0) # normalize the data by feature m = X.shape[0] # number of data points ### Build a ClassificationDataSet data object and enter all of the data and classifications from X and y. data = ClassificationDataSet(13) for i in range(m): data.addSample(X[i, :], int(y[i]))
def generate_Testdata(index): INPUT_FEATURES = 200 CLASSES = 15 #train_text,train_classfi = getTargetData("Breast_train.data") #Load boston housing dataset as an example train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData( "GCM_test.data") train_text = getIndexData(train_text, index) alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i] == "Breast": klass = 0 alldata.addSample(features, klass) elif train_classfi[i] == "Prostate": klass = 1 alldata.addSample(features, klass) elif train_classfi[i] == "Lung": klass = 2 alldata.addSample(features, klass) elif train_classfi[i] == "Colorectal": klass = 3 alldata.addSample(features, klass) elif train_classfi[i] == "Lymphoma": klass = 4 alldata.addSample(features, klass) elif train_classfi[i] == "Bladder": klass = 5 alldata.addSample(features, klass) elif train_classfi[i] == "Melanoma": klass = 6 alldata.addSample(features, klass) elif train_classfi[i] == "Uterus": klass = 7 alldata.addSample(features, klass) elif train_classfi[i] == "Leukemia": klass = 8 alldata.addSample(features, klass) elif train_classfi[i] == "Renal": klass = 9 alldata.addSample(features, klass) elif train_classfi[i] == "Pancreas": klass = 10 alldata.addSample(features, klass) elif train_classfi[i] == "Ovary": klass = 11 alldata.addSample(features, klass) elif train_classfi[i] == "Mesothelioma": klass = 12 alldata.addSample(features, klass) elif train_classfi[i] == "CNS": klass = 13 alldata.addSample(features, klass) elif train_classfi[i] == "Colorectal": klass = 14 alldata.addSample(features, klass) return { 'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata, 'index': index }
images_path = sys.argv[1] if len(sys.argv) == 2 else "data2/**/*.jpg" fitness.setData(images_path) for input_data, output_data, image_path in fitness.getNextData( recalc=True, return_image_path=True, use_images_without_output=use_net): total += 1 image = cv2.imread(image_path) if use_net: if classification: ds = ClassificationDataSet(len(input_data), nb_classes=2, class_labels=['aceptado', 'despunte']) ds.addSample(features, [0]) ds._convertToOneOfMany() out = net.activateOnDataset(ds) out_class = out.argmax( axis=1) # the highest output activation gives the class else: ds = SupervisedDataSet(len(input_data), net.indim) ds.addSample(input_data, [0] * net.indim) out = net.activateOnDataset(ds)[0] print out debug_image = [] if output_data is not None: debug_image.extend(feature.debug_feature(output_data, image_path)) if use_net:
nb_classes=2, class_labels=['Benign', 'Malignant']) all_data.setField('input', raw_inputs) all_data.setField('target', raw_target) all_data.setField('class', raw_target) test_data_temp, training_data_temp = all_data.splitWithProportion(0.33) test_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign', 'Malignant']) for n in xrange(0, test_data_temp.getLength()): test_data.addSample( test_data_temp.getSample(n)[0], test_data_temp.getSample(n)[1]) training_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign', 'Malignant']) for n in xrange(0, training_data_temp.getLength()): training_data.addSample( training_data_temp.getSample(n)[0], training_data_temp.getSample(n)[1]) training_data._convertToOneOfMany() test_data._convertToOneOfMany() #********************End of Data Preparation***************************