def main(): a = 0 for i in range(0, 100): inLayer = SigmoidLayer(2) hiddenLayer = SigmoidLayer(3) outLayer = SigmoidLayer(1) net = FeedForwardNetwork() net.addInputModule(inLayer) net.addModule(hiddenLayer) net.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer, hiddenLayer) hidden_to_out = FullConnection(hiddenLayer, outLayer) net.addConnection(in_to_hidden) net.addConnection(hidden_to_out) net.sortModules() ds = SupervisedDataSet(2, 1) ds.addSample((1, 1), (0)) ds.addSample((1, 0), (1)) ds.addSample((0, 1), (1)) ds.addSample((0, 0), (0)) trainer = BackpropTrainer(net, ds) trainer.trainUntilConvergence() out = net.activate((1, 1)) if (out < 0.5): a = a + 1 print(str(a) + "/100")
def main(): a = 0 for i in range(0,100): inLayer = SigmoidLayer(2) hiddenLayer = SigmoidLayer(3) outLayer = SigmoidLayer(1) net = FeedForwardNetwork() net.addInputModule(inLayer) net.addModule(hiddenLayer) net.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer,hiddenLayer) hidden_to_out = FullConnection(hiddenLayer,outLayer) net.addConnection(in_to_hidden) net.addConnection(hidden_to_out) net.sortModules() ds = SupervisedDataSet(2,1) ds.addSample((1,1), (0)) ds.addSample((1,0), (1)) ds.addSample((0,1), (1)) ds.addSample((0,0), (0)) trainer = BackpropTrainer(net,ds) trainer.trainUntilConvergence() out = net.activate((1,1)) if (out < 0.5): a = a + 1 print(str(a) + "/100")
class PyBrainANNs: def __init__(self, x_dim, y_dim, hidden_size, s_id): self.serialize_id = s_id self.net = FeedForwardNetwork() in_layer = LinearLayer(x_dim) hidden_layer = SigmoidLayer(hidden_size) out_layer = LinearLayer(y_dim) self.net.addInputModule(in_layer) self.net.addModule(hidden_layer) self.net.addOutputModule(out_layer) in_to_hidden = FullConnection(in_layer, hidden_layer) hidden_to_out = FullConnection(hidden_layer, out_layer) self.net.addConnection(in_to_hidden) self.net.addConnection(hidden_to_out) self.net.sortModules() def _prepare_dataset(self, x_data, y_data): assert x_data.shape[0] == y_data.shape[0] if len(y_data.shape) == 1: y_matrix = np.matrix(y_data).T else: y_matrix = y_data.values assert x_data.shape[1] == self.net.indim assert y_matrix.shape[1] == self.net.outdim data_set = SupervisedDataSet(self.net.indim, self.net.outdim) data_set.setField("input", x_data) data_set.setField("target", y_matrix) return data_set def train(self, x_data, y_data): trainer = BackpropTrainer(self.net, self._prepare_dataset(x_data, y_data)) trainer.train() def score(self, x_data, y_datas): return ModuleValidator.validate(regression_score, self.net, self._prepare_dataset(x_data, y_datas)) def predict(self, x_data): return np.array([self.net.activate(sample) for sample in x_data]) def save(self, path): joblib.dump(self.net, path) def load(self, path): self.net = joblib.load(path)
for cycle in range(100): datafile = 'top1000.data'; for entry in dictionary(datafile): #print("working on", entry); outmatrix = outputUnits(entry); lpos = 0; ds = SupervisedDataSet(NUMINPUTS, NUMOUTPUTS); for letterContexts in wordstream(input_entries = (entry,)): #print("letterContexts", letterContexts); for inarray in convertToBinary(letterContexts): outarray = outmatrix[lpos]; #print("inarray",inarray); #print("outarray",outarray); #print("inlen %d outlen %d" % (len(inarray), len(outarray))); ds.addSample(inarray, outarray); observed = net.activate(inarray); phoneme = entry.phonemes[lpos]; observedPhoneme = closestByDotProduct(observed[:MINSTRESS], articFeatures); phonemeErrors.append(bool(phoneme != observedPhoneme)); stress = entry.stress[lpos]; observedStress = closestByDotProduct(observed[MINSTRESS:], stressFeatures); stressErrors.append(bool(stress != observedStress)); lpos += 1 trainer.setData(ds); #pdb.set_trace(); err = trainer.train(); #print(err, " ", entry); print("accuracy: phonemes %.3f stresses %.3f" % (1 - np.mean(phonemeErrors), 1 - np.mean(stressErrors)) ); #accuracy is a vector with one element in {0,1} for each letter i
class BMTrainer: # 隐藏层神经元节点数: # hiddendim = 3 # # 读取训练数据源文件: # srcname = 'trainer.xlsx' # # 存储训练数据文件: # destname = 'buildBMTrainer.xml' # 源文件中结果列为几列(输出层节点数) # rescol = 1 # 是否显示计算中间迭代过程 # verbose = True # # 总体容差 # finalerror = 0 # # restest = [] # __fnn = None # __sy = None def __init__(self, _hiddendim=3, _srcnmae='trainer.xlsx', _destxls='trainerdest.xls', _destname='buildBMTrainer'): self.hiddendim = _hiddendim self.srcname = _srcnmae self.destxls = _destxls self.destname = _destname self.restest = [] self.rescol = 1 self.verbose = True # 总体容差 self.finalerror = 0 # restest = [] self.__fnn = None self.__sy = None self.__sx = None self.realy = None self.weights = [] self.srcx = [] self.srcy = [] self.destx = [] self.desty = [] self.sx = None self.sy = None self.myalg = True self.npin = 0 # 按条件读取excel def readexcel(self): workbook = xlrd.open_workbook(self.srcname) sheet1 = workbook.sheet_by_index(0) if (self.verbose): print('训练集共:' + str(sheet1.nrows) + '行,' + str(sheet1.ncols) + '列;其中结果为:' + str(self.rescol) + '列') self.srcx = [] self.srcy = [] if (sheet1.nrows > 1 and sheet1.ncols > self.rescol): self.srcx = np.zeros( (sheet1.nrows - 1, sheet1.ncols - self.rescol), dtype=np.float) self.srcy = np.zeros((sheet1.nrows - 1, self.rescol), dtype=np.float) for i in range(sheet1.nrows - 1): for j in range(sheet1.ncols): if (j < sheet1.ncols - self.rescol): self.srcx[i][j] = sheet1.cell(i + 1, j).value else: self.srcy[i][j - sheet1.ncols + self.rescol] = sheet1.cell(i + 1, j).value return self.srcx.copy(), self.srcy.copy() def writeexcel(self, x=None, size=0, savexls=''): if x == None: x = np.array(self.srcx).copy() if savexls == '': savexls = self.destxls if size > 0: workbook = xlwt.Workbook() worksheet = workbook.add_sheet('dest') self.destx = np.zeros((size, len(x[0])), dtype=np.float) # 模拟数据行数: for i in range(size): for j in range(len(x[0])): cellval = round(random.uniform(min(x[:, j]), max(x[:, j])), 3) self.destx[i][j] = cellval worksheet.write(i, j, cellval) workbook.save(savexls) def testdest(self): # 获取测试数据: workbook = xlrd.open_workbook(self.destxls) sheet1 = workbook.sheet_by_index(0) workbookw1 = xlucopy(workbook) sheetw1 = workbookw1.get_sheet(0) self.destx = np.zeros((sheet1.nrows, sheet1.ncols), dtype=np.float) for i in range(sheet1.nrows): for j in range(sheet1.ncols): self.destx[i][j] = sheet1.cell(i, j).value destx1 = self.sx.transform(self.destx) for i in range(sheet1.nrows): # for j in range(sheet1.ncols): testy = self.sy.inverse_transform( self.__fnn.activate(destx1[i]).reshape(-1, 1)) self.desty.append(testy) sheetw1.write(i, sheet1.ncols, testy[0][0]) workbookw1.save(self.destxls) maxy = max(self.srcy) miny = min(self.srcy) pmax = [] pmin = [] for i in range(sheet1.nrows): pmax.append(maxy) pmin.append(miny) plt.figure() plt.subplot(121) plt.plot(np.arange(0, sheet1.nrows), pmax, label='max', color='r', linestyle='--') plt.plot(np.arange(0, sheet1.nrows), np.array(self.desty).reshape(-1, 1), label='test', color='b', linestyle=':', marker='|') plt.plot(np.arange(0, sheet1.nrows), pmin, label='min', color='k', linestyle='--') plt.legend() plt.xlabel("PointCount") plt.ylabel("Rate") print('###################################') # for i in self.desty:q # if i<pmin[0]: # print self.desty # print pmax[0] # print pmin[0] # print 'max:' + str(np.maximum(self.desty, pmax[0])) npmax = [i for i in self.desty if i > pmax[0]] # print npmax # print len(npmax) npin = [i for i in self.desty if (i < pmax[0] and i > pmin[0])] # print npin # print len(npin) npmin = [i for i in self.desty if i < pmin[0]] # print npmin # print len(npmin) print(str(float(len(npmin)) / len(self.desty) * 100), '% 小于' + str(pmin[0])) self.npin = float(len(npin)) / len(self.desty) * 100 print( str(float(len(npin)) / len(self.desty) * 100) + '% 在所在区间[' + str(pmin[0]) + ',' + str(pmax[0]) + ']中') print( str(float(len(npmax)) / len(self.desty) * 100) + '% 大于' + str(pmax[0])) # print 'min:' + str(np.minimum(self.desty, pmin[0])) print('###################################') # plt.show() def buildBMTrainer(self): x, y = self.readexcel() # 模拟size条数据: # self.writeexcel(size=100) # resx=contrib(x,0.9) # print '**********************' # print resx # x1=x[:,[3,4,5,6,7,8,9,10,11,0,1,2]] # resx1=contrib(x1) # print '**********************' # print resx1 self.realy = y per = int(len(x)) # 对数据进行归一化处理(一般来说使用Sigmoid时一定要归一化) self.sx = MinMaxScaler() self.sy = MinMaxScaler() xTrain = x[:per] xTrain = self.sx.fit_transform(xTrain) yTrain = y[:per] yTrain = self.sy.fit_transform(yTrain) # 初始化前馈神经网络 self.__fnn = FeedForwardNetwork() # 构建输入层,隐藏层和输出层,一般隐藏层为3-5层,不宜过多 inLayer = LinearLayer(x.shape[1], 'inLayer') hiddenLayer0 = SigmoidLayer(int(self.hiddendim / 3), 'hiddenLayer0') hiddenLayer1 = TanhLayer(self.hiddendim, 'hiddenLayer1') hiddenLayer2 = SigmoidLayer(int(self.hiddendim / 3), 'hiddenLayer2') outLayer = LinearLayer(self.rescol, 'outLayer') # 将构建的输出层、隐藏层、输出层加入到fnn中 self.__fnn.addInputModule(inLayer) self.__fnn.addModule(hiddenLayer0) self.__fnn.addModule(hiddenLayer1) self.__fnn.addModule(hiddenLayer2) self.__fnn.addOutputModule(outLayer) # 对各层之间建立完全连接 in_to_hidden = FullConnection(inLayer, hiddenLayer0) hidden_to_hidden0 = FullConnection(hiddenLayer0, hiddenLayer1) hidden_to_hidden1 = FullConnection(hiddenLayer1, hiddenLayer2) hidden_to_out = FullConnection(hiddenLayer2, outLayer) # 与fnn建立连接 self.__fnn.addConnection(in_to_hidden) self.__fnn.addConnection(hidden_to_hidden0) self.__fnn.addConnection(hidden_to_hidden1) self.__fnn.addConnection(hidden_to_out) self.__fnn.sortModules() # 初始化监督数据集 DS = SupervisedDataSet(x.shape[1], self.rescol) # 将训练的数据及标签加入到DS中 # for i in range(len(xTrain)): # DS.addSample(xTrain[i], yTrain[i]) for i in range(len(xTrain)): DS.addSample(xTrain[i], yTrain[i]) # 采用BP进行训练,训练至收敛,最大训练次数为1000 trainer = BMBackpropTrainer(self.__fnn, DS, learningrate=0.0001, verbose=self.verbose) if self.myalg: trainingErrors = trainer.bmtrain(maxEpochs=10000, verbose=True, continueEpochs=3000, totalError=0.0001) else: trainingErrors = trainer.trainUntilConvergence( maxEpochs=10000, continueEpochs=3000, validationProportion=0.1) # CV = CrossValidator(trainer, DS, n_folds=4, valfunc=ModuleValidator.MSE) # CV.validate() # CrossValidator # trainingErrors = trainer.trainUntilConvergence(maxEpochs=10000,continueEpochs=5000, validationProportion=0.1) # self.finalError = trainingErrors[0][-2] # self.finalerror=trainingErrors[0][-2] # if (self.verbose): # print '最后总体容差:', self.finalError self.__sy = self.sy self.__sx = self.sx for i in range(len(xTrain)): a = self.sy.inverse_transform( self.__fnn.activate(xTrain[i]).reshape(-1, 1)) self.restest.append( self.sy.inverse_transform( self.__fnn.activate(xTrain[i]).reshape(-1, 1))[0][0]) # print sy.inverse_transform(self.__fnn.activate(xTrain[i]).reshape(-1, 1)) # sys.exit() # print sy.inverse_transform(fnn.activate(x))[0] # 在测试集上对其效果做验证 # values = [] # sy.inverse_transform() # for x in xTest: # values.append(sy.inverse_transform(fnn.activate(x))[0]) # for x in xTest: # x1 = fnn.activate(x) # x2 = sy.inverse_transform(x1.reshape(-1, 1)) # values.append(x2[0]) # print "2" # 计算RMSE (Root Mean Squared Error)均方差 # totalsum = sum(map(lambda x: x ** 0.5, map(lambda x, y: pow(x - y, 2), boston.target[per:], values))) / float(len(xTest)) # print totalsum # print "3" # 将训练数据进行保存 def saveresult(self, destname=None): if destname == None: destname = self.destname NetworkWriter.writeToFile(self.__fnn, destname + '.xml') joblib.dump(self.__sy, destname + '_sy.pkl', compress=3) joblib.dump(self.__sx, destname + '_sx.pkl', compress=3) # joblib.dump(sx, 'sx.pkl', compress=3) # joblib.dump(sy, 'sy.pkl', compress=3) # 将保存的数据读取 # fnn = NetworkReader.readFrom('BM.xml') # sx = joblib.load('sx.pkl') # sy = joblib.load('sy.pkl') def printresult(self): for mod in self.__fnn.modules: print("Module:", mod.name) if mod.paramdim > 0: print("--parameters:", mod.params) for conn in self.__fnn.connections[mod]: print("-connection to", conn.outmod.name) # conn.whichBuffers if conn.paramdim > 0: print("- parameters", conn.params) if hasattr(self.__fnn, "recurrentConns"): print("Recurrent connections") for conn in self.__fnn.recurrentConns: print("-", conn.inmod.name, " to", conn.outmod.name) if conn.paramdim > 0: print("- parameters", conn.params) def getweight(self): self.weights = [] for mod in self.__fnn.modules: for conn in self.__fnn.connections[mod]: print("-connection to", conn.outmod.name) if (conn.paramdim > 0) and (conn.inmod.name == 'inLayer'): weights1 = conn.params.reshape(conn.indim, conn.outdim) for pw in weights1: dw = 0.0 for pw1 in pw: dw += fabs(pw1) self.weights.append(dw) print('weights:', str(self.weights)) print("- parameters", conn.params) sw = MinMaxScaler() sw = sw.fit_transform( np.asarray(self.weights, dtype=float).reshape(-1, 1)) print('sw:', str(sw)) def printpilt(self, y, realy, savepng='', show=True): # plt.figure() plt.subplot(122) plt.plot(np.arange(0, len(y)), y, 'ro--', label='predict number') plt.plot(np.arange(0, len(y)), realy, 'ko-', label='true number') plt.legend() plt.xlabel("PointCount") plt.ylabel("Rate") if savepng != '': plt.savefig(savepng + '.png') # plt.get_current_fig_manager().frame.Maximize(True) # plt.get_current_fig_manager().full_screen_toggle() # plt.get_current_fig_manager().window.state('zoomed') if show: plt.show()
#outresult = percentError(trainer.testOnClassData(dataset=out), #tstdata['class']) #print " out error: %5.4f%%" % outresult #f = open(r'd:\rrr.csv','w') #for i in range(len(test)): # f.write('%d,%d,%f\n' % (items[i][0],items[i][1],out[i])) #f.close() #pass NetworkWriter.writeToFile(n, 'filename.xml') reader = BinReader(ur'F:\AliRecommendHomeworkData\1212新版\test18.expand.norm.bin') reader.open() result = [0] * reader.LineCount for i in xrange(reader.LineCount): (x,userid,itemid,label) = reader.readline() x[0] = 1 y = n.activate(x)[0] result[i] = (userid,itemid,y) if i % 10000 == 0: print '%d/%d' % (i,reader.LineCount) result.sort(key=lambda x:x[2],reverse=True) result = result[:7000] print ur'正在输出...' with open('result.csv','w') as f: for item in result: f.write('%d,%d\n' % (item[0],item[1])) print ur'阈值:',result[-1][2] print ur'样本总数:',reader.LineCount
class Network: "NETwhisperer neural network" def phoneme_to_layer(self, phoneme): return self.phonemes_to_layers[phoneme] def layer_to_phoneme(self, layer): def cos_to_input(item): phoneme, phoneme_layer = item return _cos(layer,phoneme_layer) # minimum angle should be maximum cos return max(self.phonemes_to_layers.iteritems(), key=cos_to_input)[0] def __init__(self, window_size, window_middle, n_hidden_neurons): self.window_size = window_size self.window_middle = window_middle self.n_hidden_neurons = n_hidden_neurons self.n_trainings = 0 self.training_errors = [] self._init_layers() self._generate_pybrain_network() def _init_layers(self): # one neuron for each window/letter combination self.letter_neuron_names = list(product(range(self.window_size), corpus.all_letters)) # one neuron for each phoneme trait self.phoneme_trait_neuron_names = list(corpus.all_phoneme_traits) # neuron counts self.n_input_neurons = len(self.letter_neuron_names) self.n_output_neurons = len(self.phoneme_trait_neuron_names) # mapping from (pos, letter) to input neuron index self.letters_to_neurons = dict({(pos_and_letter, index) for index, pos_and_letter in enumerate(self.letter_neuron_names)}) # mapping from trait to neuron self.traits_to_neurons = dict({(trait, index) for index, trait in enumerate(self.phoneme_trait_neuron_names)}) # mapping from phoneme to layer self.phonemes_to_layers = {} for (phoneme, traits) in corpus.phoneme_traits.iteritems(): layer = zeros(self.n_output_neurons) for trait in traits: index = self.traits_to_neurons[trait] layer[index] = 1 self.phonemes_to_layers[phoneme] = layer def _generate_pybrain_network(self): # make network self._pybrain_network = FeedForwardNetwork() # make layers self._in_layer = LinearLayer(self.n_input_neurons, name='in') self._hidden_layer = SigmoidLayer(self.n_hidden_neurons, name='hidden') self._out_layer = LinearLayer(self.n_output_neurons, name='out') self._bias_neuron = BiasUnit(name='bias') # make connections between layers self._in_hidden_connection = FullConnection(self._in_layer, self._hidden_layer) self._hidden_out_connection = FullConnection(self._hidden_layer, self._out_layer) self._bias_hidden_connection = FullConnection(self._bias_neuron, self._hidden_layer) self._bias_out_connection = FullConnection(self._bias_neuron, self._out_layer) # add modules to network self._pybrain_network.addInputModule(self._in_layer) self._pybrain_network.addModule(self._hidden_layer) self._pybrain_network.addOutputModule(self._out_layer) self._pybrain_network.addModule(self._bias_neuron) # add connections to network for c in (self._in_hidden_connection, self._hidden_out_connection, self._bias_hidden_connection, self._bias_out_connection): self._pybrain_network.addConnection(c) # initialize network with added modules/connections self._pybrain_network.sortModules() def windowIter(self, letters): assert type(letters) == str padding_before = ' ' * self.window_middle padding_after = ' ' * (self.window_size - self.window_middle - 1) padded_letters = padding_before + letters + padding_after # for each letter in the sample for l_num in range(len(letters)): letters_window = padded_letters[l_num:l_num+self.window_size] yield letters_window def generateSamples(self, letters, phonemes): assert len(letters) == len(phonemes) for (letters_window, current_phoneme) in izip(self.windowIter(letters), phonemes): yield self.letters_to_layer(letters_window), self.phoneme_to_layer(current_phoneme) def letters_to_layer(self, letters): assert len(letters) == self.window_size # start with empty layer layer = zeros(self.n_input_neurons) # loop through letters and activate each neuron for (pos, letter) in enumerate(letters): index = self.letters_to_neurons[(pos, letter)] layer[index] = 1 return layer def train(self, training_set, n_epochs=1, callback=None): # build dataset dataset = DataSet(self.n_input_neurons, self.n_output_neurons) for (ltr,ph) in training_set: for sample in self.generateSamples(ltr,ph): dataset.addSample(*sample) # build trainer trainer = Trainer(self._pybrain_network, dataset, 0.01, 1.0, 0.9) for i in xrange(n_epochs): # run callback if present if callback: callback() # train network error = trainer.train() # record training errors self.n_trainings = self.n_trainings + 1 self.training_errors.append(error) def getInputHiddenWeights(self): return self._in_hidden_connection.params.reshape((self.n_hidden_neurons, self.n_input_neurons)) def getHiddenOutputWeights(self): return self._hidden_out_connection.params.reshape((self.n_output_neurons, self.n_hidden_neurons)) def getHiddenThresholds(self): return self._bias_hidden_connection.params def getOutputThresholds(self): return self._bias_out_connection.params def lettersToPhonemesWithAngles(self, letters, expected_phonemes): for (window, exp_ph) in izip(self.windowIter(letters), expected_phonemes): input_layer = self.letters_to_layer(window) output_layer = self._pybrain_network.activate(input_layer) phoneme = self.layer_to_phoneme(output_layer) angle = _angle(output_layer, self.phoneme_to_layer(exp_ph)) yield (phoneme, angle) def lettersToPhonemes(self, letters): for window in self.windowIter(letters): input_layer = self.letters_to_layer(window) output_layer = self._pybrain_network.activate(input_layer) phoneme = self.layer_to_phoneme(output_layer) yield phoneme def addRandomWeights(self, rand_fn): cons = (self._in_hidden_connection, self._hidden_out_connection) for c in cons: for i in xrange(len(c.params)): c.params[i] += rand_fn()