def makeMnistDataSets(path): """Return a pair consisting of two datasets, the first being the training and the second being the test dataset.""" # test = SupervisedDataSet(28 * 28, 10) test = ClassificationDataSet(28*28, 10) test_image_file = os.path.join(path, 't10k-images-idx3-ubyte') test_label_file = os.path.join(path, 't10k-labels-idx1-ubyte') test_images = images(test_image_file) test_labels = (flaggedArrayByIndex(l, 10) for l in labels(test_label_file)) for image, label in zip(test_images, test_labels): test.appendLinked(image, label) # test.addSample(image, label) # train = SupervisedDataSet(28 * 28, 10) train = ClassificationDataSet(28*28, 10) train_image_file = os.path.join(path, 'train-images-idx3-ubyte') train_label_file = os.path.join(path, 'train-labels-idx1-ubyte') train_images = images(train_image_file) train_labels = (flaggedArrayByIndex(l, 10) for l in labels(train_label_file)) for image, label in zip(train_images, train_labels): train.appendLinked(image, label) # train.addSample(image, label) return train, test
def build_dataset(data_pair): inputs, classes = data_pair ds = ClassificationDataSet(256) data = zip(inputs, classes) for (inp, c) in data: ds.appendLinked(inp, [c]) return ds
def classifer(labels, data): """ data in format (value, label) """ clsff = ClassificationDataSet(2,class_labels=labels) for d in data: clsff.appendLinked(d[0], d[1]) clsff.calculateStatistics()
def getPybrainDataSet(source='Rachelle'): first = False#True qualities, combinations = cp.getCombinations() moods = combinations.keys() ds = None l=0 for mood in moods: if mood=='neutral': continue for typeNum in range(1,21): for take in range(1,10): fileName = 'recordings/'+source+'/'+mood+'/'+\ str(typeNum)+'_'+str(take)+'.skl' try: data, featuresNames = ge.getFeatureVec(fileName, first) first = False except IOError: continue if ds is None:#initialization ds = ClassificationDataSet( len(data), len(qualities) ) output = np.zeros((len(qualities))) for q in combinations[mood][typeNum]: output[qualities.index(q)] = 1 ds.appendLinked(data , output) l+=sum(output) return ds, featuresNames
def makeMnistDataSets(path): """Return a pair consisting of two datasets, the first being the training and the second being the test dataset.""" # test = SupervisedDataSet(28 * 28, 10) test = ClassificationDataSet(28 * 28, 10) test_image_file = os.path.join(path, 't10k-images-idx3-ubyte') test_label_file = os.path.join(path, 't10k-labels-idx1-ubyte') test_images = images(test_image_file) test_labels = (flaggedArrayByIndex(l, 10) for l in labels(test_label_file)) for image, label in zip(test_images, test_labels): test.appendLinked(image, label) # test.addSample(image, label) # train = SupervisedDataSet(28 * 28, 10) train = ClassificationDataSet(28 * 28, 10) train_image_file = os.path.join(path, 'train-images-idx3-ubyte') train_label_file = os.path.join(path, 'train-labels-idx1-ubyte') train_images = images(train_image_file) train_labels = (flaggedArrayByIndex(l, 10) for l in labels(train_label_file)) for image, label in zip(train_images, train_labels): train.appendLinked(image, label) # train.addSample(image, label) return train, test
def getBoardImage(img): ''' Runs an image through processing and neural network to decode digits img: an openCV image object returns: pil_im: a PIL image object with the puzzle isolated, cropped and straightened boardString: string representing the digits and spaces of a Sudoku board (left to right, top to bottom) ''' # Process image and extract digits pil_im, numbers, parsed, missed = process(img, False) if pil_im == None: return None, None net = NetworkReader.readFrom(os.path.dirname(os.path.abspath(__file__))+'/network.xml') boardString = '' for number in numbers: if number is None: boardString += ' ' else: data=ClassificationDataSet(400, nb_classes=9, class_labels=['1','2','3','4','5','6','7','8','9']) data.appendLinked(number.ravel(),[0]) boardString += str(net.activateOnDataset(data).argmax(axis=1)[0]+1) return pil_im, boardString
def import_dataset(path, shapes, used_for, samples_nbr): ds = ClassificationDataSet(4, nb_classes=3) for shape in sorted(shapes): for i in range(samples_nbr): image = imread(path + used_for + "/" + shape + str(i + 1) + ".png", as_grey=True, plugin=None, flatten=None) image_inputs = image_to_inputs(image) ds.appendLinked(image_inputs, shapes[shape]) return ds
def create_data_set(file_name): raw_data = open(file_name).readlines() data_set = ClassificationDataSet(64, nb_classes=10, class_labels=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']) for line in raw_data: # Get raw line into a list of integers line = map(lambda x: int(x), line.strip().split(',')) data_set.appendLinked(line[:-1], line[-1]) return data_set
def classificationDataSet(subjects=['a2','b','c1','c2'], segClass=0, db=None, seg_width=10, usePCA=True, n_components=5, isTrainingData=False): if not db: db = gyroWalkingData() if usePCA: DS = ClassificationDataSet(n_components*3, nb_classes=2) else: DS = ClassificationDataSet(21*3, nb_classes=2) for subject in subjects: # Initialise data if usePCA: raw = db.pca_dict(n_components=n_components, whiten=False)[subject] else: raw = db.data[subject][:,2:] gradients, standardDeviations = summaryStatistics(raw, std_window=seg_width) # Initialise segments if 0 <= segClass < 4: segs = [s for s,c in db.manual_gait_segments[subject] if c == segClass] else: segs = db.segments[subject] # Add data for i in range(0,len(raw)): """ # Look for segments in window, including those of other classes hasSeg = 0 hasOtherSeg = False for j in range(seg_width): if i+j in segs: hasSeg = 1 else: if i+j in zip(*db.manual_gait_segments[subject])[0]: hasOtherSeg = True if hasOtherSeg: hasSeg = 0 # Add segments to classifier, duplicating rare classes if it is training data for j in range(seg_width): if i+j < len(raw): DS.appendLinked( np.concatenate( [raw[i+j],gradients[i+j],standardDeviations[i+j]] ), [hasSeg] ) if isTrainingData and (hasSeg or hasOtherSeg): for i in range(0): DS.appendLinked( np.concatenate( [raw[i+j],gradients[i+j],standardDeviations[i+j]] ), [hasSeg] ) """ hasSeg = 0 if i in segs: hasSeg = 1 DS.appendLinked( np.concatenate( [raw[i],gradients[i],standardDeviations[i]] ), [hasSeg] ) DS._convertToOneOfMany() if isTrainingData: DS = balanceClassRatios(DS) return DS
def conv2DS(Xv,yv = None) : if yv == None : yv = np.asmatrix( np.ones( (Xv.shape[0],1) ) ) for j in range(len(classNames)) : yv[j] = j C = len(unique(yv.flatten().tolist()[0])) DS = ClassificationDataSet(M, 1, nb_classes=C) for i in range(Xv.shape[0]) : DS.appendLinked(Xv[i,:].tolist()[0], [yv[i].A[0][0]]) DS._convertToOneOfMany( ) return DS
def import_dataset(path, shapes, used_for, samples_nbr): ds = ClassificationDataSet(4, nb_classes=3) for shape in sorted(shapes): for i in range(samples_nbr): image = imread(path + used_for + '/' + shape + str(i + 1) + '.png', as_grey=True, plugin=None, flatten=None) image_inputs = image_to_inputs(image) ds.appendLinked(image_inputs, shapes[shape]) return ds
def __get_classification_dataset__(self, data): DS = ClassificationDataSet(self.__num_features__, class_labels=['neg', 'pos']) for curr_data_idx in range(data.shape[0]): curr_data = data[curr_data_idx, :-1] print 'curr_data', curr_data curr_label = data[curr_data_idx, -1] print 'curr_label', curr_label DS.appendLinked(curr_data, [curr_label]) # return DS
def getSeparateDataSets(testSize = 0.2): trnDs = ClassificationDataSet(len(feats), nb_classes=len(classes)) tstDs = SupervisedDataSet(len(feats), 1) for c in classes: with codecs.open(os.path.join(data_root, c+".txt"), 'r', 'utf8') as f: lines = f.readlines() breakpoint = (1.0 - testSize) * len(lines) for i in range(len(lines)): r = Record("11", lines[i], c, "") if i < breakpoint: trnDs.appendLinked(r.features(), [r.class_idx()]) else: tstDs.appendLinked(r.features(), [r.class_idx()]) trnDs._convertToOneOfMany([0, 1]) return trnDs, tstDs
def conv2DS(Xv, yv=None, labels=string.ascii_uppercase): N, M = Xv.shape if yv is None: yv = np.asmatrix(np.ones((Xv.shape[0], 1))) for j in range(len(classNames)): yv[j] = j le = preprocessing.LabelEncoder() y_asnumbers = le.fit_transform(np.ravel(yv)) C = len(np.unique(np.ravel(yv))) DS = ClassificationDataSet(M, 1, nb_classes=C, class_labels=labels) for i in range(Xv.shape[0]): DS.appendLinked(Xv[i, :], y_asnumbers[i]) return DS
def __prepareTrainingData(self,places,num_of_places): alldata = ClassificationDataSet(2, 1, nb_classes=self.num_of_places) previous_feature_vector=None previous_place=None counter=0 for location_event in places: if location_event.place!=None: current_timestamp=location_event.timestamp new_feature_vector=self.__prepare_features(location_event.place,current_timestamp) new_place=self.__prepare_place(location_event.place) #if previous_feature_vector!=None and previous_place!=None and location_event.place.name!=previous_place.name: if previous_feature_vector!=None: counter+=1 if location_event.place.name=="2": print previous_feature_vector print location_event.place.name for i in range(1): alldata.appendLinked(previous_feature_vector,[new_place]) previous_feature_vector=new_feature_vector previous_place=location_event.place self.last_visit_map[location_event.place]=current_timestamp previous_feature_vector=None previous_place=None probiability_of_static=float(counter)/float(len(places)) probiability_of_static=0.5 for location_event in places: if location_event.place!=None: current_timestamp=location_event.timestamp new_feature_vector=self.__prepare_features(location_event.place,current_timestamp) new_place=self.__prepare_place(location_event.place) rand=random.random() if previous_feature_vector!=None and rand<=probiability_of_static: counter+=1 if location_event.place.name=="1": print new_feature_vector print location_event.place.name for i in range(1): alldata.appendLinked(previous_feature_vector,[new_place]) previous_feature_vector=new_feature_vector previous_place=new_place self.last_visit_map[location_event.place]=current_timestamp return alldata
def init_classifier(self, hidden_units = 20): data = ClassificationDataSet(len(self.channels), nb_classes=5) # Prepare the dataset for i in range(len(self.classification_proc)): data.appendLinked(self.y_proc[i], self.classification_proc[i]) # Make global for test purposes self.data = data # Prepare training and test data, 75% - 25% proportion self.testdata, self.traindata = data.splitWithProportion(0.25) #self.traindata._convertToOneOfMany() #self.testdata._convertToOneOfMany() # CHECK the number of hidden units fnn = buildNetwork(self.traindata.indim, hidden_units, self.traindata.outdim) # CHECK meaning of the parameters trainer = BackpropTrainer(fnn, dataset=self.traindata, momentum=0, verbose=True, weightdecay=0.01) return fnn, trainer, data
def train(training_data): training_set = ClassificationDataSet(len(feats), nb_classes=len(classes)) for inst in training_data: training_set.appendLinked(inst.features(), [inst.class_idx()]) training_set._convertToOneOfMany([0, 1]) net_placeholder[0] = buildNetwork( training_set.indim, int((training_set.indim + training_set.outdim)/2), training_set.outdim, bias=True, hiddenclass=TanhLayer, outclass=SoftmaxLayer ) trainer = BackpropTrainer( net_placeholder[0], training_set, momentum=0.75, verbose=False, learningrate=0.05 ) trainer.trainUntilConvergence(maxEpochs=100, validationProportion=0.1)
def fnn(): data = orange.ExampleTable("D:\\Back-up-THICK_on_Vista\\Orange\\W1BIN.tab")#input_dict['data']) addMetaID(data) n_attrs = len(data.domain.attributes) classes = list(data.domain.classVar.values) pbdata = ClassificationDataSet(n_attrs, class_labels=classes) for ex in data: pbdata.appendLinked([x.value for x in list(ex)[:n_attrs]], [classes.index(ex.getclass().value)]) tstdata, trndata = pbdata.splitWithProportion( 0.25 ) trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0]
def build_net(self): if os.path.exists(self.NET_FILE): return NetworkReader.readFrom(self.NET_FILE) ds = ClassificationDataSet(len(feats), nb_classes=len(classes)) for c in classes: print c with codecs.open(os.path.join(self.data_root, c+".txt"), 'r', 'utf8') as f: for line in f: r = Record("11", line, c, "") ds.appendLinked(r.features(), [r.class_idx()]) ds._convertToOneOfMany([0, 1]) net = buildNetwork(ds.indim, int((ds.indim + ds.outdim)/2), ds.outdim, bias=True, hiddenclass=TanhLayer, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, ds, momentum=0.75, verbose=True) trainer.trainUntilConvergence(maxEpochs=300) NetworkWriter.writeToFile(net, self.NET_FILE) return net
def bagging_classifier(self, trainInstances, testInstances, L): """Train and test bagging classifier for the neural network. (1) generate self.m new training sets each with L instances from trainInstances using replacement; (2) train self.m neural networks on the self.m training sets; (3) majority vote Precondition: dimensions of trainInstances,testInstances must match self.fnn :param trainInstances: collection of training examples :type trainInstances: ClassificationDataSet :param testInstances: collection of test examples :type testInstances: ClassificationDataSet :param L: number of items in each training set :type L: int :returns: accuracy of predictions :rtype: float """ ensemble = [] for j in range(self.m): # generate random sample of indices tset = random.sample(range(0, len(trainInstances["input"])), L) c = ClassificationDataSet(self.fnn.indim, 1, nb_classes=self.fnn.outdim) for index in tset: c.appendLinked(trainInstances['input'][index], trainInstances['target'][index]) c._convertToOneOfMany(bounds=[0,1]) # 1 of k binary representation net = buildNetwork(24, 18, 16, 8, hiddenclass=TanhLayer, outclass=SoftmaxLayer) # define neural net trainer = BackpropTrainer(net, dataset=c, learningrate=0.01, momentum=0.1, verbose=True, weightdecay=0.01) trainer.trainEpochs(20) # train ensemble.append(net) print percentError(trainer.testOnClassData( dataset=testInstances ), testInstances['class']) # key is test example, value is list of labels from each model d = dict.fromkeys(np.arange(len(testInstances['input']))) for model in ensemble: # get label with highest probability for each test example result = model.activateOnDataset(testInstances).argmax(axis=1) for k in range(len(result)): if d[k] == None: d[k] = [result[k]] else: d[k].append(result[k]) predictions = [] for ex in d.keys(): predictions.append(max(set(d[ex]), key=d[ex].count)) # majority voting actual = [int(row[0]) for row in testInstances['class']] return accuracy_score(actual, predictions) # traditional accuracy calc
def cross_validate(comps, view=False): for layer_size in [5,10,25]: for alpha in [0.01, 0.03, 0.1]: fold_accuracy = [] for i in xrange(0, 5): if not view: xs = np.load('pca_fold_'+str(i)+'_train_xs.npy')[:,0:comps] ys = np.load('pca_fold_'+str(i)+'_train_ys.npy') DS = ClassificationDataSet(comps, nb_classes=10) for j in xrange(0, xs.shape[0]): DS.appendLinked(xs[j,:], ys[j]) DS._convertToOneOfMany(bounds=[0,1]) net = buildNetwork(comps, layer_size, 10, outclass=SoftmaxLayer) #net = buildNetwork(comps, layer_size, layer_size, 10, outclass=SoftmaxLayer) if not view: trainer = BackpropTrainer(net, DS, learningrate=alpha) trainer.trainUntilConvergence(maxEpochs=4) test_xs = np.load('pca_fold_'+str(i)+'_test_xs.npy')[:,0:comps] test_ys = np.load('pca_fold_'+str(i)+'_test_ys.npy') preds = np.zeros(test_ys.shape) correct = 0 for j in xrange(0, test_xs.shape[0]): if view: break pred_raw = net.activate(test_xs[j,:].tolist()) pred = np.argmax(np.array(pred_raw)) preds[j] = pred if pred == test_ys[j]: correct += 1 if view: preds = np.load('long_result_%d_%d_%f_%d.npy' % (comps, layer_size, alpha, i)) for j in xrange(0, preds.shape[0]): if preds[j] == test_ys[j]: correct += 1 else: np.save('long_result_%d_%d_%f_%d.npy' % (comps, layer_size, alpha, i), preds) accuracy = float(correct)/test_xs.shape[0] fold_accuracy.append(accuracy) acc = np.sum(fold_accuracy)/5 if view: #print "%d & %d & %f & %f\\\\" % (comps, layer_size, alpha, acc) #print "\hline" print acc,",", else: print "Components: %d\tHidden Nodes: %d\tLearning Rate: %f Accuracy: %f" % (comps, layer_size, alpha, acc)
def getdata(self): dataset = ClassificationDataSet(9, 1) with open('xtraindata.csv') as tf: for line in tf: data = [x for x in line.strip().split(',') if x] # indata = tuple(data[1:10]) # outdata = tuple(data[10:]) """ for i in range(4,10): data[i] = str(float(data[i])*100) if float(data[12]) > 0: data[12] = float(data[12]) * 100 """ for i in range(1, 4): data[i] = str(float(data[i]) / 100) dataset.appendLinked(data[1:10], data[12]) return dataset
def teachNeuralNetwork(countState, testNumber): data = tuple(readData("DataMatrix" + str(1) + ".txt")) size = len(data) ds = ClassificationDataSet(size, 1, nb_classes=3, class_labels=['0', '1', '-1']) #SET TRAINI DATA for i in range(1, countState): try: data = [] data = (readData("DataMatrix" + str(i) + ".txt")) answer = readAnswer("Answer" + str(i) + ".txt") idx = 2 if answer == -1 else answer ds.appendLinked(data, [idx]) except BaseException: l = 1 #Just some fo catch it's not usefull ds._convertToOneOfMany() net = buildNetwork(ds.indim, 300, ds.outdim, recurrent=True) # trainer = RPropMinusTrainer(net, dataset = ds, momentum = 0.1, verbose = False,weightdecay=0.03) trainer = BackpropTrainer(net, dataset=ds, momentum=0.1, verbose=False, weightdecay=0.03) trainer.trainUntilConvergence(maxEpochs=2000) tstData = (readData("DataMatrixExpr" + str(testNumber) + ".txt")) ansArr = (net.activate(tstData)) indx = 0 max = ansArr[0] for i in range(len(ansArr)): if ansArr[i] > max: max = ansArr[i] indx = i t = [0, 1, -1] return t[indx] #========MAINCODE============= #teachNeuralNetwork()
def Predict(self): prediction=[] attributescount=len(self.testdata[0]) nrclass = len(set(self.testlabel)) dstraindata = ClassificationDataSet(attributescount, target=nrclass, nb_classes=nrclass, class_labels=list(set(self.testlabel))) for i in range(len(self.testdata)): dstraindata.appendLinked(self.testdata[i], self.testlabel[i]) dstraindata._convertToOneOfMany() out = self.net.activateOnDataset(dstraindata) prediction = out.argmax(axis=1) ''' for testrecord in self.testdata : out = self.net.activate(testrecord)[0] prediction.append(out) ''' self.result = [self.testlabel, prediction]
def classificationDataSet(subject='a1', db=None): if not db: db = gyroWalkingData() raw = db.data[subject][:,2:] segs = db.segments[subject] DS = ClassificationDataSet(21, nb_classes=2) for i in range(0,len(raw),5): hasSeg = 0 for j in range(5): if i+j in segs: hasSeg = 1 for j in range(5): if i+j < len(raw): DS.appendLinked(raw[i+j],[hasSeg]) DS._convertToOneOfMany() return DS
def fnn(): data = orange.ExampleTable( "D:\\Back-up-THICK_on_Vista\\Orange\\W1BIN.tab") #input_dict['data']) addMetaID(data) n_attrs = len(data.domain.attributes) classes = list(data.domain.classVar.values) pbdata = ClassificationDataSet(n_attrs, class_labels=classes) for ex in data: pbdata.appendLinked([x.value for x in list(ex)[:n_attrs]], [classes.index(ex.getclass().value)]) tstdata, trndata = pbdata.splitWithProportion(0.25) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0]
def conv2DS(Xv, yv=None, labels=string.ascii_uppercase): N, M = Xv.shape if yv is None: yv = np.asmatrix(np.ones((Xv.shape[0], 1))) for j in range(len(classNames)): yv[j] = j le = preprocessing.LabelEncoder() y_asnumbers = le.fit_transform(np.ravel(yv)) C = len(np.unique(np.ravel(yv))) DS = ClassificationDataSet( M, 1, nb_classes=C, class_labels=labels) for i in range(Xv.shape[0]): DS.appendLinked(Xv[i, :], y_asnumbers[i]) return DS
def split_dataset(data_path, ratio): dataset = ClassificationDataSet(256, 10) with open("../data/semeion.data") as data: for record in data: line = record[:1812] line = line.replace(' ', ', ') data = line[:2046] dataList = data.split(',') dataList = map(float, dataList) ans = line[2048:-2] ansList = ans.split(',') ansList = map(int, ansList) dataset.appendLinked(dataList, ansList) train_data, test_data = dataset.splitWithProportion(ratio) return train_data, test_data
def init_brain(learn_data, epochs, hidden_count, TrainerClass=BackpropTrainer): global data_dir print("\t Epochs: ", epochs) if learn_data is None: return None print("Building network") net = buildNetwork(7 * 7, hidden_count, 4, hiddenclass=SigmoidLayer) # net = buildNetwork(64 * 64, 32 * 32, 8 * 8, 5) # net = buildNetwork(64 * 64, 5, hiddenclass=LinearLayer) # fill dataset with learn data trans = {'0': 0, '1': 1, '2': 2, '3': 3} ds = ClassificationDataSet(7 * 7, nb_classes=4, class_labels=['0', '1', '2', '3']) for inp, out in learn_data: ds.appendLinked(inp, [trans[out]]) ds.calculateStatistics() print("\tNumber of classes in dataset = {0}".format(ds.nClasses)) print("\tOutput in dataset is ", ds.getField('target').transpose()) ds._convertToOneOfMany(bounds=[0, 1]) print("\tBut after convert output in dataset is \n", ds.getField('target')) trainer = TrainerClass(net, learningrate=0.1, verbose=True) trainer.setData(ds) print( "\tEverything is ready for learning.\nPlease wait, training in progress..." ) start = time.time() trainer.trainEpochs(epochs=epochs) end = time.time() f = open(data_dir + "/values.txt", "w") f.write("Training time: %.2f \n" % (end - start)) f.write("Total epochs: %s \n" % (trainer.totalepochs)) # f.write("Error: %.22f" % (trainer.trainingErrors[len(trainer.trainingErrors) - 1])) f.close() print("Percent of error: ", percentError(trainer.testOnClassData(), ds['class'])) print("\tOk. We have trained our network.") NetworkWriter.writeToFile(net, data_dir + "/net.xml") return net
def init_classifier(self, hidden_units=20): data = ClassificationDataSet(len(self.channels), nb_classes=5) # Prepare the dataset for i in range(len(self.classification_proc)): data.appendLinked(self.y_proc[i], self.classification_proc[i]) # Make global for test purposes self.data = data # Prepare training and test data, 75% - 25% proportion self.testdata, self.traindata = data.splitWithProportion(0.25) #self.traindata._convertToOneOfMany() #self.testdata._convertToOneOfMany() # CHECK the number of hidden units fnn = buildNetwork(self.traindata.indim, hidden_units, self.traindata.outdim) # CHECK meaning of the parameters trainer = BackpropTrainer(fnn, dataset=self.traindata, momentum=0, verbose=True, weightdecay=0.01) return fnn, trainer, data
def consturt_train_data(self): # print len(self.output_train) # print len(self.eigenvector) ds = ClassificationDataSet(self.vct_len, 1, nb_classes=2) for i in range(len(self.output_train)): ds.appendLinked(self.eigenvector[i], self.output_train[i]) # print ds # print ds ds.calculateStatistics() # split training, testing, validation data set (proportion 4:1) tstdata_temp, trndata_temp = ds.splitWithProportion(0.25) tstdata = ClassificationDataSet(self.vct_len, 1, nb_classes=2) for n in range(0, tstdata_temp.getLength()): tstdata.appendLinked( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(self.vct_len, 1, nb_classes=2) for n in range(0, trndata_temp.getLength()): trndata.appendLinked( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) # one hot encoding # print trndata testdata = ClassificationDataSet(self.vct_len, 1, nb_classes=2) test_data_temp = self.test_data for n in range(len(test_data_temp)): testdata.addSample(test_data_temp[n], [0]) # print testdata trndata._convertToOneOfMany() tstdata._convertToOneOfMany() testdata._convertToOneOfMany() return trndata, tstdata, testdata, ds
def teachNeuralNetwork(countState,testNumber): data = tuple(readData("DataMatrix" + str(1) + ".txt")) size = len(data) ds = ClassificationDataSet(size, 1,nb_classes=3,class_labels = ['0','1','-1']) #SET TRAINI DATA for i in range (1,countState): try: data = [] data = (readData("DataMatrix" + str(i) + ".txt")) answer = readAnswer("Answer" + str(i) + ".txt") idx = 2 if answer == -1 else answer ds.appendLinked (data,[idx]) except BaseException: l=1 #Just some fo catch it's not usefull ds._convertToOneOfMany() net = buildNetwork(ds.indim, 300, ds.outdim ,recurrent = True) # trainer = RPropMinusTrainer(net, dataset = ds, momentum = 0.1, verbose = False,weightdecay=0.03) trainer = BackpropTrainer(net, dataset = ds, momentum = 0.1, verbose = False,weightdecay=0.03) trainer.trainUntilConvergence(maxEpochs= 2000) tstData = (readData("DataMatrixExpr" + str(testNumber) + ".txt")) ansArr = (net.activate(tstData)) indx = 0 max = ansArr[0] for i in range(len(ansArr)): if ansArr[i] > max: max = ansArr[i] indx = i t = [0,1,-1] return t[indx] #========MAINCODE============= #teachNeuralNetwork()
def Predict(self): prediction = [] attributescount = len(self.testdata[0]) nrclass = len(set(self.testlabel)) dstraindata = ClassificationDataSet(attributescount, target=nrclass, nb_classes=nrclass, class_labels=list( set(self.testlabel))) for i in range(len(self.testdata)): dstraindata.appendLinked(self.testdata[i], self.testlabel[i]) dstraindata._convertToOneOfMany() out = self.net.activateOnDataset(dstraindata) prediction = out.argmax(axis=1) ''' for testrecord in self.testdata : out = self.net.activate(testrecord)[0] prediction.append(out) ''' self.result = [self.testlabel, prediction]
def classificationTrainingSet(holdouts=['a1'], db=None): if not db: db = gyroWalkingData() DS = ClassificationDataSet(21, nb_classes=2) for subject in db.data: if subject not in holdouts: raw = db.data[subject][:,2:] segs = db.segments[subject] seg_width = 2 for i in range(0,len(raw),seg_width): hasSeg = 0 for j in range(seg_width): if i+j in segs: hasSeg = 1 for j in range(seg_width): if i+j < len(raw): DS.appendLinked(raw[i+j],[hasSeg]) DS._convertToOneOfMany() return DS
''' dataset = pd.get_dummies(df) pd.set_option('display.max_columns', 1000) # 把所有的列全部显示出来 X = dataset[dataset.columns[:-2]] Y = dataset[dataset.columns[-2:]] labels = dataset.columns._data[-2:] # Step 3:将数据转换为SupervisedDataSet/ClassificationDtaSet对象 from pybrain.datasets import ClassificationDataSet ds = ClassificationDataSet(19, 1, nb_classes=2, class_labels=labels) for i in range(len(Y)): y = 0 if Y['好瓜_是'][i] == 1: y = 1 ds.appendLinked(X.ix[i], y) ds.calculateStatistics() # 返回一个类直方图?搞不懂在做什么 # Step 4: 分开测试集和训练集 testdata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=labels) testdata_temp, traindata_temp = ds.splitWithProportion(0.25) for n in range(testdata_temp.getLength()): testdata.appendLinked( testdata_temp.getSample(n)[0], testdata_temp.getSample(n)[1]) print(testdata) testdata._convertToOneOfMany() print(testdata) traindata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=labels) for n in range(traindata_temp.getLength()): traindata.appendLinked(
ds = ClassificationDataSet(numInput, nb_classes=numTarget) #2D input and 1D output # Loading code based off of this code: # http://stackoverflow.com/questions/8139822/how-to-load-training-data-in-pybrain import csv tf = open(trainingDataFile, 'r') for line in tf.readlines(): # Split the values on the current line, and convert to float tfData = [float(x) for x in line.strip().split(',') if x != ''] inData = tuple(tfData[:numInput]) # Grab first numInput values outData = tuple(tfData[numInput:]) # Grab the rest # Add the data to the datasets ds.appendLinked(inData, outData) # This converts each output to the desired activations of each neuron in the output layer # Ex. class 1 target -> 10000000, class 2 target -> 01000000, class 3 target -> 00100000 etc. ds._convertToOneOfMany(bounds=[0, 1]) # Some info printing code from here: http://pybrain.org/docs/tutorial/fnn.html print("Number of training patterns: ", len(ds)) print("Input and output dimensions: ", ds.indim, ds.outdim) print("First sample (input, target, class):") print(ds['input'][0], ds['target'][0], ds['class'][0]) #input() # Trainers from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer
#supervised learning tutorial from pybrain.datasets import SupervisedDataSet from pybrain.datasets import ClassificationDataSet # DS = SupervisedDataSet(3,2) # DS.appendLinked([1,2,3], [4,5]) # print(len(DS)) # DS['input'] # array([[1., 2., 3.]]) DS = ClassificationDataSet(2, class_labels=['Urd', 'Verdandi', 'skuld']) DS.appendLinked([0.1, 0.5] , [0]) DS.appendLinked([1.2, 1.2] , [1]) DS.appendLinked([1.4, 1.6] , [1]) DS.appendLinked([1.6, 1.8] , [1]) DS.appendLinked([0.10, 0.80] , [2]) DS.appendLinked([0.20, 0.90] , [2]) print(DS.calculateStatistics()) print(DS.classHist) print(DS.nClasses) print(DS.getClass(1)) print(DS.getField('target').transpose())
class NeuralNetworkClassification(algorithmbase): def ExtraParams(self, hiddenlayerscount, hiddenlayernodescount): self.hiddenlayerscount = hiddenlayerscount self.hiddenlayernodescount = hiddenlayernodescount return self def PreProcessTrainData(self): self.traindata = preprocess_apply(self.traindata, self.missingvaluemethod, self.preprocessingmethods) def PrepareModel(self, savedmodel=None): if savedmodel != None: self.trainer = savedmodel else: attributescount = len(self.traindata[0]) nrclass = len(set(self.trainlabel)) self.ds = ClassificationDataSet(attributescount, target=nrclass, nb_classes=nrclass, class_labels=list( set(self.trainlabel))) for i in range(len(self.traindata)): self.ds.appendLinked(self.traindata[i], [self.trainlabel[i]]) self.ds._convertToOneOfMany() self.net = FeedForwardNetwork() inLayer = LinearLayer(len(self.traindata[0])) self.net.addInputModule(inLayer) hiddenLayers = [] for i in range(self.hiddenlayerscount): hiddenLayer = SigmoidLayer(self.hiddenlayernodescount) hiddenLayers.append(hiddenLayer) self.net.addModule(hiddenLayer) outLayer = SoftmaxLayer(nrclass) self.net.addOutputModule(outLayer) layers_connections = [] layers_connections.append(FullConnection(inLayer, hiddenLayers[0])) for i in range(self.hiddenlayerscount - 1): layers_connections.append( FullConnection(hiddenLayers[i - 1], hiddenLayers[i])) layers_connections.append( FullConnection(hiddenLayers[-1], outLayer)) for layers_connection in layers_connections: self.net.addConnection(layers_connection) self.net.sortModules() #training the network self.trainer = BackpropTrainer(self.net, self.ds) self.trainer.train() def PreProcessTestDate(self): self.testdata = preprocess_apply(self.testdata, self.missingvaluemethod, self.preprocessingmethods) def Predict(self): prediction = [] attributescount = len(self.testdata[0]) nrclass = len(set(self.testlabel)) dstraindata = ClassificationDataSet(attributescount, target=nrclass, nb_classes=nrclass, class_labels=list( set(self.testlabel))) for i in range(len(self.testdata)): dstraindata.appendLinked(self.testdata[i], self.testlabel[i]) dstraindata._convertToOneOfMany() out = self.net.activateOnDataset(dstraindata) prediction = out.argmax(axis=1) ''' for testrecord in self.testdata : out = self.net.activate(testrecord)[0] prediction.append(out) ''' self.result = [self.testlabel, prediction] def GetModel(self): return self.trainer
print("Th complete dataset shape is : ", Data.shape) print("Th complete target shape is : ", Target.shape) print("The training data shape is (2/3 of complete dataset): ", DataTrain.shape) print("The training target shape is (2/3 of complete target): ", TargetTrain.shape) print("The test data shape is (1/3 of complete dataset): ", DataTest.shape) print("The test target shape is (1/3 of complete target): ", TargetTest.shape) print("\n") #prepare data for pybrain number_of_columns = Data.shape[1] PyBData = ClassificationDataSet(number_of_columns, 1, nb_classes=2) PyBDataTrain = ClassificationDataSet(number_of_columns, 1, nb_classes=2) PyBDataTest = ClassificationDataSet(number_of_columns, 1, nb_classes=2) for i in xrange(len(Data)): PyBData.appendLinked(Data[i], Target[i]) for i in xrange(len(DataTrain)): PyBDataTrain.appendLinked(DataTrain[i], TargetTrain[i]) for i in xrange(len(DataTest)): PyBDataTest.appendLinked(DataTest[i], TargetTest[i]) #*******************End of Preparing Data & Target for Estimators****************** #*******************Decision Tree Classification****************** print("Entering Decision Tree Classifier with starting time", time.localtime()) clf_dt = tree.DecisionTreeClassifier(criterion="entropy") clf_dt = clf_dt.fit(DataTrain, TargetTrain)
] # list of black and white pixels # Normalize the pixels to average brightness avgluminosity = sum(pixels) / len(pixels) processedpixels = map( lambda p: min(p + 255 / 2 - avgluminosity, 255) if (avgluminosity < 255 / 2) else max(p + 255 / 2 - avgluminosity, 0), pixels) # Save lowres images a = np.array(processedpixels) a = a.reshape(-1, width) im = toimage(a) im.save(os.path.join(murkafolder + '/bw', trainingpicture)) # Populate database ds.appendLinked(processedpixels, [0]) # 0 = Murka; 1 = Masya for trainingpicture in [ f for f in os.listdir(masyafolder) if f.endswith('.png') ]: im = Image.open(os.path.join(masyafolder, trainingpicture)) imlow = im.resize((width, height), Image.ANTIALIAS) # Convert black and white (L = luminosity; 0 = black; 255 = white) bw_im = imlow.convert('L') pixels = [ bw_im.getpixel((i, j)) for j in range(height) for i in range(width) ] # list of black and white pixels # Normalize the pixels to average brightness avgluminosity = sum(pixels) / len(pixels) processedpixels = map( lambda p: min(p + 255 / 2 - avgluminosity, 255)
for i in range(len(data)): if data[i][4] == 'setosa': data[i][4] = 0 elif data[i][4] == 'versicolor': data[i][4] = 1 else: data[i][4] = 2 net = buildNetwork(4, 5, 3) ds = ClassificationDataSet(4, nb_classes=3, class_labels=['setosa', 'versicolor', 'verginica']) for i in data: ds.appendLinked(i[:4], list(i[4])) ds._convertToOneOfMany(bounds=[0, 1]) trainer = BackpropTrainer(net, verbose=True) trainer.setData(ds) trainer.trainUntilConvergence(maxEpochs=100) names_dict = { '0': 'setosa', '1': 'versicolor', '2': 'verginica', 0: 'setosa', 1: 'versicolor', 2: 'verginica'
def getDataFromFolder(folderpath, datapath): ''' Creates pybrain ClassificationDataSet from folder of iPhone images and .txt file of data ''' # Setup Dataset for PyBrain data = ClassificationDataSet(400, nb_classes=9, class_labels=['1','2','3','4','5','6','7','8','9']) # Get photos dirs = os.listdir(folderpath) # Get data dat = [] with open(datapath) as f: dat = f.read().splitlines() # Set variables parsed = 0 missed = 0 missednumbers = 0 # count of numbers in image that were not parsed falsenumbers = 0 # count of false number parsings from empty squares gatherednumbers = 0 correctspaces = 0 # Process each photo/data pairing for p in range(1,len(dirs)-1): # Create image img = cv2.imread((folderpath + dirs[p])) print(folderpath + dirs[p]) # Get all the digits in the image pil_im, numbers, parsedcheck, missedcheck = process(img, False) # Board located successfully parsed += parsedcheck # Board not located missed += missedcheck ind = 0 # Match digits photos with data for number in numbers: if (number is None): # True negative if dat[p-1][ind] == '0': correctspaces += 1 ind += 1 # False negative else: missednumbers += 1 ind += 1 else: # False positive if dat[p-1][ind] == '0': falsenumbers += 1 ind += 1 # True positive else: gatherednumbers += 1 data.appendLinked(number.ravel(), [int(dat[p-1][ind])-1]) ind += 1 # Print results print ("\nData processed: ") print("\n Puzzles located successfully: " + str(parsed)) print(" Puzzles not located successfully: " + str(missed)+"\n") print(" Number of digit samples gathered (true positives): " + str(gatherednumbers)) print(" Number of spaces confirmed (true negatives): " + str(correctspaces)+"\n") print(" Number of digit samples missed in a processed image (false negatives): " + str(missednumbers)) print(" Number of digit samples that needed to be removed (false positives): " + str(falsenumbers)) return data
def classDsBuild(data): DS = ClassificationDataSet(5,nb_classes=4) for ele in data: DS.appendLinked((ele[0],ele[1],ele[2],ele[3],ele[4]), (ele[5])) dsTrain,dsTest = DS.splitWithProportion(0.8) return dsTrain, dsTest
######################################################################################### ######################################################################################### ######################################################################################### #create a dataset for use in pybrain from pybrain.datasets import ClassificationDataSet alldata=ClassificationDataSet(3,nb_classes=2,class_labels=['default_Yes','default_No']) #classes are encoded into one output unit per class, that takes on a certain value if the class is present #alldata._convertToOneOfMany(bounds=[0, 1]) #convert back to a single column of class labels #alldata._convertToClassNb() #Target dimension is supposed to be 1 #The targets are class labels starting from zero for i in range(N): alldata.appendLinked(Xdf.ix[i,:],Ydf['default_Yes'].ix[i,:]) #generate training and testing data sets tstdata, trndata = alldata.splitWithProportion(0.10) #classes are encoded into one output unit per class, that takes on a certain value if the class is present trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) len(tstdata), len(trndata) #calculate statistics and generate histograms alldata.calculateStatistics() print alldata.classHist print alldata.nClasses print alldata.getClass(1) ######################################################################################### ######################################################################################### #########################################################################################
camada2 = int(sys.argv[6]) k = 0 size = 70 for line in inputFile.readlines(): data = [float(x) for x in line.strip().split() if x != ''] indata = tuple(data[:7]) outdata = tuple(data[7:]) ds.addSample(indata,outdata) k +=1 if (k == size): testdata, traindata = ds.splitWithProportion( PorcDivTest ) ds.clear() k = 0 for inp,targ in testdata: testSet.appendLinked(inp,targ-1) for inp,targ in traindata: trainSet.appendLinked(inp,targ-1) trainSet._convertToOneOfMany(bounds=[0, 1]) testSet._convertToOneOfMany(bounds=[0, 1]) if(camada2==0): net = buildNetwork(trainSet.indim,camada1,trainSet.outdim, recurrent = True) else : net = buildNetwork(trainSet.indim,camada1,camada2,trainSet.outdim, recurrent = True) trainer = BackpropTrainer(net,dataset = trainSet,learningrate = Learning,momentum = Momentum, verbose = True) trainer.trainOnDataset(trainSet,Ciclos) out = net.activateOnDataset(testSet) out = out.argmax(axis=1)
sns.plt.show() ''' # one-hot encoding wm_df = pd.get_dummies(df) X = wm_df[wm_df.columns[1:-2]] # input Y = wm_df[wm_df.columns[-2:]] # output label = wm_df.columns._data[-2:] # class label # construction of data in pybrain's formation from pybrain.datasets import ClassificationDataSet ds = ClassificationDataSet(19, 1, nb_classes=2, class_labels=label) for i in range(len(Y)): y = 0 if Y['好瓜_是'][i] == 1: y = 1 ds.appendLinked(X.values[i], y) ds.calculateStatistics() # generation of train set and test set (3:1) tstdata_temp, trndata_temp = ds.splitWithProportion(0.25) tstdata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=label) for n in range(0, tstdata_temp.getLength()): tstdata.appendLinked( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=label) for n in range(0, trndata_temp.getLength()): trndata.appendLinked( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1])
print "number of inputs m: ", num_input # initialize two classification data sets, one for training # and cross-validation purposes, the other for the test data # default parameter 'target' in method ClassificationDataSet # is '1' DS = ClassificationDataSet(len(features[1]), nb_classes=10) test_DS = ClassificationDataSet(len(features[1]), nb_classes=10) i = 0 # as written, the follwing 3 lines feed only the first # 10000 training cases into the NN for training, for speed # and demonstration purposes. For real training, use # while i < num_input: while i < 1000: DS.appendLinked(features[i], targets[i]) i += 1 i = 0 # as written, the following 3 lines predict only the first # 50 test cases, for the sake of speed and demonstration while i < 50: test_DS.appendLinked(test_features[i], 0) i += 1 # split up the classification data set 'DS' into training # and cross-validation sets cvdata, trndata = DS.splitWithProportion(0.2) # the _convertToOneOfMany method DS._convertToOneOfMany(bounds=[0, 1])
data_set = load_breast_cancer() X = data_set.data # feature feature_names = data_set.feature_names y = data_set.target # label target_names = data_set.target_names # data normalization from sklearn import preprocessing normalized_X = preprocessing.normalize(X) # construction of data in pybrain's formation from pybrain.datasets import ClassificationDataSet ds = ClassificationDataSet(30, 1, nb_classes=2, class_labels=y) for i in range(len(y)): ds.appendLinked(X[i], y[i]) ds.calculateStatistics() # split of training and testing dataset tstdata_temp, trndata_temp = ds.splitWithProportion(0.5) tstdata = ClassificationDataSet(30, 1, nb_classes=2) for n in range(0, tstdata_temp.getLength()): tstdata.appendLinked( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(30, 1, nb_classes=2) for n in range(0, trndata_temp.getLength()): trndata.appendLinked( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1])
import cv2 from pybrain.tools.shortcuts import buildNetwork from pybrain.datasets import ClassificationDataSet from pybrain.supervised.trainers import BackpropTrainer from pybrain.tools.customxml.networkwriter import NetworkWriter from pybrain.tools.customxml.networkreader import NetworkReader from pybrain.structure import SigmoidLayer DS = ClassificationDataSet(896, class_labels=['notFace', 'Face']) posDir = 'pos/' posFilenames = [f for f in listdir(posDir)] for f in posFilenames: img = (cv2.imread(posDir + f, 0)).ravel() img = img / 127.5 - 1 DS.appendLinked(img, [1]) negDir = 'neg/' negFilenames = [f for f in listdir(negDir)] for f in negFilenames: img = cv2.imread(negDir + f, 0).ravel() img = img / 127.5 - 1 DS.appendLinked(img, [0]) # Dataset setup here Momen = 0.0 WeiDecay = 0.003 print 'training...' net = buildNetwork(896, 100, 10, 1, bias=True, outclass=SigmoidLayer) trainer = BackpropTrainer(net, DS, momentum=Momen, weightdecay=WeiDecay) proportion2Cost = trainer.trainUntilConvergence(validationProportion=0.20, maxEpochs=1000,
# open image currentArray = numpy.array([]) fullPath = negativeImageDirectory + '/' + fi currentImage = Image.open(fullPath) imagePixels = currentImage.load() imageSize = currentImage.size # read pixel values for i in range(imageSize[0]): for j in range(imageSize[1]): pixelArray = [ imagePixels[i, j][0], imagePixels[i, j][1], imagePixels[i, j][2] ] currentArray = numpy.append(currentArray, pixelArray) # append to dataset dataSet.appendLinked(currentArray, 0) # do the same but for the positive images positiveImageFiles = os.listdir(positiveImageDirectory) for fi in positiveImageFiles: currentArray = numpy.array([]) fullPath = positiveImageDirectory + '/' + fi currentImage = Image.open(fullPath) imagePixels = currentImage.load() imageSize = currentImage.size for i in range(imageSize[0]): for j in range(imageSize[1]): pixelArray = [ imagePixels[i, j][0], imagePixels[i, j][1], imagePixels[i, j][2]
def model_net(self, fields, datas=None): # 对需要处理的数据进行归一化处理,防止大数吃掉小数 # https://www.jianshu.com/p/682c24aef525 用python做数据分析4|pandas库介绍之DataFrame基本操作 # 归一 https://www.zhihu.com/question/57509028 # 标准化和归一化什么区别? https://www.zhihu.com/question/20467170 # sklearn库中数据预处理函数fit_transform()和transform()的区别 http://blog.csdn.net/quiet_girl/article/details/72517053 # 需具体了解其实现方式 from sklearn.preprocessing import MinMaxScaler from pybrain.structure import SoftmaxLayer from pybrain.datasets import ClassificationDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.utilities import percentError from pybrain.structure import TanhLayer scaler = MinMaxScaler() datas[fields] = scaler.fit_transform(datas[fields]) tran_data = datas[fields].values tran_target = datas['Flag'].values tran_label = ['Sell', 'Hold', 'Buy'] class_datas = ClassificationDataSet(6, 1, nb_classes=3, class_labels=tran_label) print(type(tran_target)) print(tran_target) for i in range(len(tran_data)): class_datas.appendLinked(tran_data[i], tran_target[i]) tstdata_temp, trndata_temp = class_datas.splitWithProportion(0.25) print(len(tstdata_temp), len(trndata_temp)) tstdata = ClassificationDataSet(6, 1, nb_classes=3, class_labels=tran_label) trndata = ClassificationDataSet(6, 1, nb_classes=3, class_labels=tran_label) for n in range(0, trndata_temp.getLength()): trndata.appendLinked( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) for n in range(0, tstdata_temp.getLength()): tstdata.appendLinked( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) tstdata._convertToOneOfMany() trndata._convertToOneOfMany() tnet = buildNetwork(trndata.indim, 5, trndata.outdim, hiddenclass=TanhLayer, outclass=SoftmaxLayer) trainer = BackpropTrainer(tnet, dataset=trndata, batchlearning=True, momentum=0.1, verbose=True, weightdecay=0.01) for i in range(5000): trainer.trainEpochs(20) trnresult = percentError(trainer.testOnClassData(), trndata['class']) testResult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) print("epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % testResult) return trainer, class_datas
return leftDs, rightDs def castToRegression(self, values): """Converts data set into a SupervisedDataSet for regression. Classes are used as indices into the value array given.""" regDs = SupervisedDataSet(self.indim, 1) fields = self.getFieldNames() fields.remove('target') for f in fields: regDs.setField(f, self[f]) regDs.setField('target', values[self['class'].astype(int)]) return regDs if __name__ == "__main__": dataset = ClassificationDataSet(2, 1, class_labels=['Urd', 'Verdandi', 'Skuld']) dataset.appendLinked([ 0.1, 0.5 ] , [0]) dataset.appendLinked([ 1.2, 1.2 ] , [1]) dataset.appendLinked([ 1.4, 1.6 ] , [1]) dataset.appendLinked([ 1.6, 1.8 ] , [1]) dataset.appendLinked([ 0.10, 0.80 ] , [2]) dataset.appendLinked([ 0.20, 0.90 ] , [2]) dataset.calculateStatistics() print(("class histogram:", dataset.classHist)) print(("# of classes:", dataset.nClasses)) print(("class 1 is: ", dataset.getClass(1))) print(("targets: ", dataset.getField('target'))) dataset._convertToOneOfMany(bounds=[0, 1]) print("converted targets: ") print((dataset.getField('target'))) dataset._convertToClassNb()
""" return ret vecSize = 100 subjects = [2, 5, 6, 7, 8, 12, 16, 35 ,39] ds = None for s in subjects: for cycleNum in range(1, 13): fileName = '../inputs/Vicon from CMU/subjects/'+str(s)+'/'+str(cycleNum)+'.amc' try: data = getData(fileName, vecSize) except IOError: continue if ds is None:#initialization ds = ClassificationDataSet( len(data), 1 ) ds.appendLinked(data , subjects.index(s)) ds.nClasses = len(subjects) decay= 0.99995 myWeightdecay = 0.8 initialLearningrate= 0.005 hidden_size = 1000 epochs=1000 splitProportion = 0.5 print 'dataset size', len(ds) print 'input layer size', len(ds.getSample(0)[0]) tstdata, trndata = ds.splitWithProportion( splitProportion ) trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( )
# Calculate and print number of total inputnodes (unique taxa) and total output nodes (uniqe categories to classify) collection.setUniqueTaxa() collection.setUniqueCategories() print 'Unique Taxa (#input nodes): ' + str(len(collection.getUniqueTaxa())) print 'Unique Categories (#output nodes): ' + str( len(collection.getUniqueCategories())) # Create trainingsets and test sets trainingset = collection.createAnnTrainingsets() testset = collection.createAnnTestsets() # Map trainingsets and test sets to PyBrain DS = ClassificationDataSet(trainingset['input_dimension'], trainingset['output_dimension']) for i in range(0, len(trainingset['input_arrays'])): DS.appendLinked(trainingset['input_arrays'][i], trainingset['output_arrays'][i]) DStest = ClassificationDataSet(trainingset['input_dimension'], trainingset['output_dimension']) for i in range(0, len(testset['input_arrays'])): DStest.appendLinked(testset['input_arrays'][i], testset['output_arrays'][i]) # Create network fnn = buildNetwork(DS.indim, 50, DS.outdim, outclass=SoftmaxLayer, fast=False) #fnn = buildNetwork( DS.indim, 5, DS.outdim, outclass=SoftmaxLayer ) # Create trainer trainer = BackpropTrainer(fnn, dataset=DS, momentum=0.01, verbose=True, weightdecay=0.0001)
numPatTest, numColsTest = patternTest.shape #Generar el input patternTrainInput = patternTrain[:, 1:numColsTrain] patternValidInput = patternValid[:, 1:numColsValid] patternTestInput = patternTest[:, 1:numColsTest] #Generar salidas deseadas patternTrainTarget = np.zeros([numPatTrain, 2]) patternValidTarget = np.zeros([numPatValid, 2]) patternTestTarget = np.zeros([numPatTest, 2]) #Crear los dataset supervisados trainDS = ClassificationDataSet(numColsTrain-1, nb_classes=2, class_labels=['Not_Cancer', 'Cancer']) for i in range(numPatTrain): trainDS.appendLinked(patternTrainInput[i], patternTrain[i, 0]) validDS = ClassificationDataSet(numColsTrain-1, nb_classes=2, class_labels=['Not_Cancer', 'Cancer']) for i in range(numPatValid): validDS.appendLinked(patternValidInput[i], patternValid[i, 0]) testDS = ClassificationDataSet(numColsTrain-1, nb_classes=2, class_labels=['Not_Cancer', 'Cancer']) for i in range(numPatTest): testDS.appendLinked(patternTestInput[i], patternTest[i, 0]) #Crear la SVM y el trainer svm = SVMUnit() trainer = SVMTrainer(svm, trainDS) #Parámetros de la SVM myLog2C=0.
def getDataFromSudokuDataset(): ''' Creates pybrain ClassificationDataSet from folder of images from Sudoku dataset found at https://github.com/wichtounet/sudoku_dataset ''' data = ClassificationDataSet(400, nb_classes=9, class_labels=['1','2','3','4','5','6','7','8','9']) path = '/Users/kdelaney/Downloads/sudoku_dataset-master/images/' dirs = os.listdir(path) parsed = 0 missed = 0 missednumbers = 0 # count of numbers in image that were not parsed falsenumbers = 0 # count of false number parsings from empty squares gatherednumbers = 0 correctspaces = 0 for p in range(0,len(dirs), 2): img = cv2.imread((path + dirs[p+1])) print((path + dirs[p+1])) dat = [] with open((path + dirs[p])) as f: next(f) next(f) for line in f: dat += line.split() pil_im, numbers, parsedcheck, missedcheck = process(img, False) parsed += parsedcheck missed += missedcheck ind = 0 if numbers is not None: for number in numbers: if number is None: if dat[ind] == '0': correctspaces += 1 ind += 1 else: missednumbers += 1 ind += 1 else: if dat[ind] == '0': falsenumbers += 1 ind += 1 else: gatherednumbers += 1 data.appendLinked(number.ravel(), [int(dat[ind])-1]) ind += 1 print ("\nprocessed: ") print("\n Test images processed successfully: " + str(parsed)) print(" Test images not processed successfully : " + str(missed)+"\n") print(" Number of digit samples gathered (true positives): " + str(gatherednumbers)) print(" Number of spaces confirmed (true negatives): " + str(correctspaces)+"\n") print(" Number of digit samples missed in a processed image (false negatives): " + str(missednumbers)) print(" Number of digit samples removed (false positives): " + str(falsenumbers)) return data
tstresults.append([]) #hits[m].append(0) excpectedLens.append(0) #for mood in couple: for typeNum in range(1,21): for take in range(1,10): fileName = '../inputs/Rachelle/v2/recordingsByMood/'+mood+'/'+\ str(typeNum)+'_'+str(take)+'.skl' try: data = ge.getFeatureVec(fileName) except IOError: continue if ds is None:#initialization ds = ClassificationDataSet( len(data), 1 ) excpectedLens[m]+=1 ds.appendLinked(data , moods.index(mood)) splitProportion = 0.2 decay= 0.99993 myWeightdecay = 0.5 initialLearningrate= 0.01 hidden_size = 200 epochs=1000 momentum=0.15 ds.nClasses = len(moods) tstdata, trndata = ds.splitWithProportion( splitProportion ) trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) inLayer = LinearLayer(len(trndata.getSample(0)[0])) hiddenLayer = SigmoidLayer(hidden_size) outLayer = LinearLayer(len(trndata.getSample(0)[1])) n = FeedForwardNetwork()