def momentum_train(self, train_x:np.ndarray, train_y:np.ndarray, alpha=1e-01, beta=5e-02, epsilon=0, tresh=1e-02, max_epochs=300,reset=True ): """ trains the network using classical momentum alpha: learning rate beta: acceleration coefficent epsilon: regularization coefficent tresh: treshold to exit main loop of training max_epochs: maximum number of epochs to be done """ # stuff for statistics computation init_time = time.perf_counter() grad_norms = [] errors = [] times = [] def statistics(gradient_norm): now = time.perf_counter() times.append( now - init_time ) grad_norms.append( gradient_norm ) e = self.test_loss(train_x,train_y,epsilon) errors.append( e ) print(gradient_norm, e ) clear_output(wait=True) statistics(0) # number of patterns in training set, epochs of training currently executed epoch = 0 # functions for gradient computation compute_gradient = self.compute_gradient # prevous velocity (v_t) for momentum computation. (placeholder) old_d = np.array( [ np.zeros(self.w[i].shape) for i in range(self.Nl+1) ] ,dtype=object) # main loop: compute velocity and update weights gradient_norm = np.inf # placeholder for gradient norm init_gradient_norm = np.linalg.norm( myflatten( compute_gradient( train_x,train_y, epsilon ) ) ) # initial norm of the gradient, to be used for stoppin criterion while( ( gradient_norm / init_gradient_norm ) > tresh and epoch < max_epochs ): if reset and epoch % 50 == 0: print('reset') old_d = np.array( [ np.zeros(self.w[i].shape) for i in range(self.Nl+1) ] ,dtype=object) # compute gradient ( \Nabla loss(w_t) ) and its norm g = compute_gradient( train_x, train_y, epsilon ) gradient_norm = np.linalg.norm( myflatten(g) ) # generally gradient is a tensor/matrix. To compute its norm i flatten it in order to make it become a vector #compute velocity d d = -alpha*g + beta*old_d # update weights and prevoius velocity self.w = self.w + d; self.w = set_zeros(self.w) old_d = d # update epochs counter and collect statistics epoch +=1; statistics(gradient_norm / init_gradient_norm) return grad_norms, errors, times, epoch
def dump(self, rows=None, cols=None): """show table """ if rows is None: rows = ones(len(self._row_names), dtype=bool) elif isinstance(rows, str): rows = self.pattern(rows) rows = where(rows)[0] if cols is None: colsn = list(myflatten(self._col_groups)) cols = [getattr(self, n) for n in colsn] if isinstance(cols, str): colsn = cols.split() cols = [self._eval(n) for n in cols.split()] out = [] rowfmt = ['%%-%d.%ds' % (self._name_char, self._name_char)] rowfmt += ['%%-%d.%ds' % (self._entry_char, self._name_char)] * len(colsn) rowfmt = ' '.join(rowfmt) out.append(rowfmt % tuple(['name'] + colsn)) if len(rows) == 1: v = [self._row_names[0] ] + [_mystr(c, self._entry_char) for c in cols] out.append(rowfmt % tuple(v)) else: for i in rows: v = [self._row_names[i] ] + [_mystr(c[i], self._entry_char) for c in cols] out.append(rowfmt % tuple(v)) return '\n'.join(out)
def dump(self,rows=None,cols=None): """show table """ if rows is None: rows=ones(len(self._row_names),dtype=bool) elif isinstance(rows,str): rows=self.pattern(rows) rows=where(rows)[0] if cols is None: colsn=list(myflatten(self._col_groups)) cols=[getattr(self,n) for n in colsn] if isinstance(cols,str): colsn=cols.split() cols=[self._eval(n) for n in cols.split()] out=[] rowfmt=['%%-%d.%ds' % (self._name_char,self._name_char)] rowfmt+=['%%-%d.%ds' % (self._entry_char,self._name_char)] * len(colsn) rowfmt=' '.join(rowfmt) out.append(rowfmt % tuple(['name'] + colsn ) ) if len(rows)==1: v=[ self._row_names[0] ]+ [ _mystr(c,self._entry_char) for c in cols ] out.append(rowfmt % tuple(v)) else: for i in rows: v=[ self._row_names[i] ]+ [ _mystr(c[i],self._entry_char) for c in cols ] out.append(rowfmt % tuple(v)) return '\n'.join(out)
def sbend_to_rbend(self,sel): """Add edgefocusing to selection >>> t=trtable('q b f f b q') >>> t.b.l=1; t.b.kn0l=1; t.l[2]=0 >>> t2=t.sbend_to_rbend(t//'b') >>> t2.dump('.','l kn1l') name l kn1l start 0. 0. q 0. 0. b_edge 0. 0. b 0. 0. b_edge 0. 0. f 0. 0. f 0. 0. b_edge 0. 546.302e-03 b 1.000 0. b_edge 0. 546.302e-03 q 0. 0. end 0. 0. """ sel=(where(sel)[0]).tolist() names=self._row_names new_names=names[:] for i in sel: new_names[i]=[names[i]+'_edge',names[i],names[i]+'_edge'] new_names=list(myflatten(new_names)) edges=zeros(len(new_names),dtype=bool) sbends=zeros(len(new_names),dtype=bool) for i in range(len(sel)): edges [ sel[i]+i*2 ]= True sbends[ sel[i]+i*2+1 ]= True edges [ sel[i]+i*2+2 ]= True new_t=trtable(new_names,inc_start=False,inc_stop=False) for cname in myflatten(self._col_groups): col=getattr(new_t,cname) col[- edges]=getattr(self,cname) angle=new_t.kn0l[sbends] l=new_t.l[sbends] pos=l>0 kick=( (angle/l)*tan(angle/2) )[pos] idx=where(edges)[0][0::2][pos] new_t.kn1l[idx]=kick new_t.kn1l[idx+2]=kick return new_t
def split(self,n): oldnames=array(self._row_names[1:-1]) # remove start end r=len(oldnames) newnames=zeros(r*n,dtype=oldnames.dtype) for i in range(n): newnames[i::n]=oldnames t=trtable(newnames.tolist(),full=self._full) for c in myflatten(self._col_groups): for i in range(n): getattr(t,c)[i+1:-1:n]=getattr(self,c)[1:-1] getattr(t,c)[0]=getattr(self,c)[0] getattr(t,c)[-1]=getattr(self,c)[-1] t.l=t.l/n t.kn1l=t.kn1l/n t.ks1l=t.ks1l/n t.kn0l=t.kn0l/n t.ks0l=t.ks0l/n return t
def add_rows(self, row_names, idx=None): if not hasattr(row_names, '__iter__'): row_names = row_names.split() if idx is None: idx = len(self) lrow = len(row_names) if lrow > 0: self._row_names.insert(idx, row_names) self._row_names = list(myflatten(self._row_names)) new_data = [] for d in self._data: newd = zeros((lrow, d.shape[1]), dtype=d.dtype) new_data.append(r_[d[:idx], newd, d[idx:]]) self._data = new_data for cnames, d in zip(self._col_groups, self._data): for cname in cnames: self.__dict__[cname].data = d self._row_index = _mkidx(self._row_names) if self._full: self._mkfull()
def add_rows(self,row_names,idx=None): if not hasattr(row_names,'__iter__'): row_names=row_names.split() if idx is None: idx=len(self) lrow=len(row_names) if lrow>0: self._row_names.insert(idx,row_names) self._row_names=list(myflatten(self._row_names)) new_data=[] for d in self._data: newd=zeros( (lrow,d.shape[1]), dtype=d.dtype) new_data.append(r_[ d[:idx], newd, d[idx:] ]) self._data=new_data for cnames,d in zip(self._col_groups,self._data): for cname in cnames: self.__dict__[cname].data=d self._row_index=_mkidx(self._row_names) if self._full: self._mkfull()
import time # activation functions from numpy import tanh ide = lambda x : np.copy(x) relu = lambda x: x*(x > 0) from scipy.special import softmax # loss functions: squared_error = lambda y,d: np.linalg.norm( (y - d).flatten() ) ** 2 cross_entropy = lambda y,d: -np.sum( d * np.log( y + np.finfo(float).eps ) ) MSE = lambda y,d: np.mean( np.square( y-d ) ) # norm-regularization (rewritten a naive version of numpy's l1-norm in order to give it an array of matrices of (possibly) different sizes and get back l1 norm of each matrix ) #l1 = lambda x: np.array( [ np.max(np.sum(np.abs(w), axis=0)) for w in x ] ) l1 = lambda x: np.linalg.norm( myflatten(x).reshape(-1), ord=1) def derivative(f): """ When f is an activation function, returns derivative w.r.t. potential of activation When f is a loss, returns derivative w.r.t. activation of last layer's units When f is norm, returns derivative w.r.t. weigts (When f is cross_entropy and activation of output units is softmax, maths say derivative of loss w.r.t potential is one ) """ if f == tanh: return lambda x: 1.0 - tanh(x)**2 elif f == relu: return lambda x: 1*(x>=0) elif f == ide or f == softmax: return lambda x : x-x+1 elif f == squared_error or f==MSE or f == cross_entropy: