def run_one_epoch(self, epoch, train_data, batch_size, weight_decay, floss, optimizer, lr_scheduler): fdloss = utils.lookup_dfunc(floss) n = len(train_data) total_loss = 0 total_succ = 0 for batch in utils.break_into_batches(train_data, batch_size, shuffle=True): X = np.array([x for x, _ in batch]) Y_expect = np.array([y for _, y in batch]) # forward Xw = np.dot(X, self.w) Xwpb = Xw + self.b Y = self.fact(Xwpb) Loss = floss(Y, Y_expect) # backward dY = fdloss(Loss, Y, Y_expect) dXwpb = self.fdact(dY, Y, Xwpb) dXw = dXwpb db = np.sum(dXwpb, axis=0) dw = utils.ddot1(dXw, Xw, X, self.w) # metrics total_loss += np.sum(Loss) total_succ += np.sum(Y_expect[range(Y_expect.shape[0]), np.argmax(Y, axis=-1)]) # optimize lr = lr_scheduler(epoch) optimizer([self.w, self.b], [dw, db], n, batch_size, lr, weight_decay) return total_succ / n, total_loss / n
def run_one_epoch(self, epoch, train_data, batch_size, weight_decay, floss, optimizer, lr_scheduler): fdloss = utils.lookup_dfunc(floss) n = len(train_data) total_loss = 0 total_succ = 0 for batch in utils.break_into_batches(train_data, batch_size, shuffle=True): X = np.array([x for x, _ in batch]) Y_expect = np.array([y for _, y in batch]) # forward act = X actws = [] actwbs = [] acts = [act] for w, b, fact in zip(self.weights, self.biases, self.facts): actw = np.dot(act, w) actwb = actw + b act = fact(actwb) actws.append(actw) actwbs.append(actwb) acts.append(act) Y = act Loss = floss(Y, Y_expect) # backward dws = [] dbs = [] dact = fdloss(Loss, Y, Y_expect) for actwb, actw, prev_act, w, fdact in reversed( list(zip(actwbs, actws, acts, self.weights, self.fdacts))): dactwb = fdact(dact, act, actwb) dactw = dactwb db = np.sum(dactwb, axis=0) dw = utils.ddot1(dactw, actw, prev_act, w) dact = utils.ddot0(dactw, actw, prev_act, w) act = prev_act dws.append(dw) dbs.append(db) dws.reverse() dbs.reverse() # metrics total_loss += np.sum(Loss) total_succ += np.sum(Y_expect[range(Y_expect.shape[0]), np.argmax(Y, axis=-1)]) # optimize lr = lr_scheduler(epoch) optimizer(self.weights + self.biases, dws + dbs, n, batch_size, lr, weight_decay) return total_succ / n, total_loss / n
def run_one_epoch(self, epoch, train_data, batch_size, weight_decay, floss, optimizer, lr_scheduler): fdloss = utils.lookup_dfunc(floss) params = [ param for layer in self.all_layers() for param in layer.params ] n = len(train_data) total_loss = 0 total_acc = 0 for batch in utils.break_into_batches(train_data, batch_size, shuffle=True): batch_grads = [] for x, y_expect in batch: # forward y = self.forward(x) loss = floss(y, y_expect) # backward dy = fdloss(loss, y, y_expect) self.backward(dy, y, x) # update grad grads = [ grad for layer in self.all_layers() for grad in layer.params_grad ] if not batch_grads: batch_grads = grads else: for batch_grad, grad in zip(batch_grads, grads): batch_grad += grad # metrics total_loss += loss total_acc += y_expect[np.argmax(y)] == 1 # optimize lr = lr_scheduler(epoch) optimizer(params, batch_grads, n, batch_size, lr, weight_decay) return total_acc / n, total_loss / n
def run_one_epoch(self, epoch, train_data, batch_size, weight_decay, floss, optimizer, lr_scheduler): fdloss = utils.lookup_dfunc(floss) n = len(train_data) total_loss = 0 total_acc = 0 for batch in utils.break_into_batches(train_data, batch_size, shuffle=True): grad_w = np.zeros(self.w.shape) grad_b = np.zeros(self.b.shape) for x, y_expect in batch: # forward wx = np.dot(self.w, x) wxpb = wx + self.b y = self.fact(wxpb) loss = floss(y, y_expect) # backward dy = fdloss(loss, y, y_expect) dwxpb = self.fdact(dy, y, wxpb) dwx = dwxpb db = dwxpb dw = utils.ddot0(dwx.reshape((-1, 1)), wx, self.w, x.reshape((-1, 1))) # update grad grad_w += dw grad_b += db # metrics total_loss += loss total_acc += y_expect[np.argmax(y)] == 1 # optimize lr = lr_scheduler(epoch) optimizer([self.w, self.b], [grad_w, grad_b], n, batch_size, lr, weight_decay) return total_acc / n, total_loss / n
def run_one_epoch(self, epoch, train_data, batch_size, weight_decay, floss, optimizer, lr_scheduler): fdloss = utils.lookup_dfunc(floss) n = len(train_data) total_loss = 0 total_acc = 0 for batch in utils.break_into_batches(train_data, batch_size, shuffle=True): grad_w = np.zeros(self.w.shape) grad_b = np.zeros(self.b.shape) for x, y_expect in batch: # forward wx = np.dot(self.w, x) wxpb = wx + self.b y = self.fact(wxpb) loss = floss(y, y_expect) # backward dy = fdloss(loss, y, y_expect) dwxpb = self.fdact(dy, y, wxpb) dwx = dwxpb db = dwxpb dw = utils.ddot0(dwx.reshape((-1,1)), wx, self.w, x.reshape((-1,1))) # update grad grad_w += dw grad_b += db # metrics total_loss += loss total_acc += y_expect[np.argmax(y)] == 1 # optimize lr = lr_scheduler(epoch) optimizer([self.w, self.b], [grad_w, grad_b], n, batch_size, lr, weight_decay) return total_acc / n, total_loss / n
def __init__(self, fact): self.fact = fact self.fdact = utils.lookup_dfunc(fact)
def __init__(self, sizes, facts): self.sizes = sizes self.facts = facts self.fdacts = [utils.lookup_dfunc(fact) for fact in facts]
def __init__(self, output_size, fact): self.output_size = output_size self.fact = fact self.fdact = utils.lookup_dfunc(fact)
def __init__(self, fit_chan, fit_rect, fit_stride, fact): self.fit_chan = fit_chan self.fit_rect = fit_rect self.fit_stride = fit_stride self.fact = fact self.fdact = utils.lookup_dfunc(fact)