class NN2(object): def __init__(self, in_layer_size, hidden_layer_size, out_layer_size): self.fc1 = Linear(in_layer_size, hidden_layer_size) self.ac1 = ReLu() self.fc2 = Linear(hidden_layer_size, out_layer_size) def forward(self, x): s1 = self.fc1.forward(x) a1 = self.ac1.forward(s1) a2 = self.fc2.forward(a1) return a2 def update(self, params): self.fc1.update([params[0]]) self.fc2.update([params[1]]) def backward(self, dL_dy2): ''' output dy/dw2 = d(f(wx+b))/dw = x output dy/dw1 = d(f(wx+b))/dw = x ''' #dL_ds2 = self.ac2.backward(dL_dy2) dL_dy1 = self.fc2.backward(dL_dy2) dL_ds1 = self.ac1.backward(dL_dy1) dL_dy0 = self.fc1.backward(dL_ds1) return dL_dy0 def param(self): return [self.fc1.param()[0], self.fc2.param()[0]]
def __init__(self, embed_dim, z_channels, s_channels, num_dilation_layer=10): super(Aligner, self).__init__() self.embed_dim = embed_dim self.z_channels = z_channels self.s_channels = s_channels self.pre_process = Conv1d(embed_dim, 256, kernel_size=3) self.dilated_conv_layers = nn.ModuleList() for i in range(num_dilation_layer): dilation = 2**i self.dilated_conv_layers.append(DilatedConvBlock(256, 256, z_channels, s_channels, dilation)) self.post_process = nn.Sequential( Linear(256, 256), nn.ReLU(inplace=False), Linear(256, 1), nn.ReLU(inplace=False), )
def get_categorical_model(input_neurons, output_neurons, layers=None): """ creates a model with Categorical Crossentropy Loss :param input_neurons: input neuron number :param output_neurons: output neuron number :param layers: list of intermediate neuron sizes, default is the number of neurons and layer sizes for neuron :return: network with Categorical Crossentropy loss """ if layers is None: layers = [25, 25, 25] default_act = 'relu' model = Sequential() idx = 1 layers.insert(0, input_neurons) while idx < len(layers): model.add(Linear(out=layers[idx], input_size=layers[idx - 1], activation=default_act)) idx += 1 # model.add(Dropout(prob=0.2)) model.add(Linear(out=output_neurons, activation='softmax')) # Set loss function to model: Sequential object ce = LossCrossEntropy() model.loss = ce return model
def _read_txt(path): print 'loading plain text model from', path with open(path, 'rb') as f: content = f.read().split('\n') modules = [] c = 0 line = content[c] while len(line) > 0: if line.startswith(Linear.__name__): # @UndefinedVariable import error suppression for PyDev users lineparts = line.split() m = int(lineparts[1]) n = int(lineparts[2]) mod = Linear(m,n) for i in xrange(m): c+=1 mod.W[i,:] = np.array([float(val) for val in content[c].split() if len(val) > 0]) c+=1 mod.B = np.array([float(val) for val in content[c].split()]) modules.append(mod) elif line.startswith(Rect.__name__): # @UndefinedVariable import error suppression for PyDev users modules.append(Rect()) elif line.startswith(Tanh.__name__): # @UndefinedVariable import error suppression for PyDev users modules.append(Tanh()) elif line.startswith(SoftMax.__name__): # @UndefinedVariable import error suppression for PyDev users modules.append(SoftMax()) c+=1; line = content[c] return Sequential(modules)
class NN(object): def __init__(self, in_layer_size, out_layer_size): self.fc1 = Linear(in_layer_size, out_layer_size, bias=False) self.ac1 = Tanh() def forward(self, x): s1 = self.fc1.forward(x) a1 = self.ac1.forward(s1) return a1 def update(self, params): #print("W:", params[0].shape) self.fc1.update([params[0]]) if len(params) > 1: #print("R:", len([params[1]])) self.ac1.update(params[1]) #print("W:",self.fc1.param()[0][0]) #print("dW:",self.fc1.param()[0][1]) def backward(self, dL_dy): ''' output dy/dw2 = d(f(wx+b))/dw = x output dy/dw1 = d(f(wx+b))/dw = x ''' #print(dL_dy) dL_ds = self.ac1.backward(dL_dy) dL_dy0 = self.fc1.backward(dL_ds) #print(dL_dy0) return dL_dy0 def param(self): return [self.fc1.param()[0], self.ac1.param()[0]]
def test_init_not_compatible(self): with self.assertRaises(NotCompatibleError): model = Sequential([ Linear(input_size=2, out=22, activation='tanh'), Linear(input_size=23, out=22, activation='tanh') # second layer's input_size is not compatible with previous layer output_size ])
def _read_txt_old(path): print('loading plain text model from', path) with open(path, 'rb') as f: content = f.read().split('\n') modules = [] c = 0 line = content[c] while len(line) > 0: if line.startswith( Linear.__name__ ): # @UndefinedVariable import error suppression for PyDev users lineparts = line.split() m = int(lineparts[1]) n = int(lineparts[2]) mod = Linear(m, n) for i in range(m): c += 1 mod.W[i, :] = np.array([ float(val) for val in content[c].split() if len(val) > 0 ]) c += 1 mod.B = np.array([float(val) for val in content[c].split()]) modules.append(mod) elif line.startswith( Rect.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Rect()) elif line.startswith( Tanh.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Tanh()) elif line.startswith( SoftMax.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(SoftMax()) elif line.startswith( BinStep.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(BinStep()) elif line.startswith( NegAbs.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(NegAbs()) else: raise ValueError('Layer type ' + [s for s in line.split() if len(s) > 0][0] + ' not supported by legacy plain text format.') c += 1 line = content[c] return Sequential(modules)
def test_save_model(self): """ :return: """ model = Sequential() model.add(Linear(input_size=2, out=24, activation='tanh')) model.add(Linear(input_size=24, out=2, activation='tanh')) pass
def test_load_model(self): """ :return: """ model = Sequential() model.add(Linear(input_size=2, out=24, activation='tanh')) model.add(Linear(input_size=24, out=2, activation='tanh')) file_name = "model.h5py"
def test_init_not_input_size(self): """ :return: """ with self.assertRaises(InputSizeNotFoundError): model = Sequential([ Linear(out=22, activation='tanh'), # NO input_size is given Linear(input_size=23, out=22, activation='tanh') ])
def __init__( self, d_model: int = 512, # dimension of model input_dim: int = 80, # dimension of feature vector d_ff: int = 2048, # dimension of feed forward network num_layers: int = 6, # number of encoder layers num_heads: int = 8, # number of attention heads ffnet_style: str = 'ff', # style of feed forward network [ff, conv] dropout_p: float = 0.3, # probability of dropout pad_id: int = 0, # identification of pad token ) -> None: super(SpeechTransformerEncoder, self).__init__() self.d_model = d_model self.num_layers = num_layers self.num_heads = num_heads self.pad_id = pad_id self.input_proj = Linear(input_dim, d_model) self.input_norm = LayerNorm(d_model) self.input_dropout = nn.Dropout(p=dropout_p) self.positional_encoding = PositionalEncoding(d_model) self.layers = nn.ModuleList([ SpeechTransformerEncoderLayer(d_model, num_heads, d_ff, dropout_p, ffnet_style) for _ in range(num_layers) ])
def main(): # optimizer = SGD(lr, weight_decay, mu=mu) optimizer = Adam(lr, weight_decay) model = ListModel(net=[ Linear(784, 400), ReLU(), Linear(400, 100), ReLU(), Linear(100, 10), Softmax() ], loss=CrossEntropyLoss()) for epoch in range(num_epochs): print('epoch number: {}'.format(epoch)) train(model, optimizer) valid(model)
def __init__(self, d_k, d_v, d_model, n_heads, dropout): super(MultiHeadAttention, self).__init__() self.attention = _MultiHeadAttention(d_k, d_v, d_model, n_heads, dropout) self.proj = Linear(n_heads * d_v, d_model) self.dropout = nn.Dropout(dropout) self.layer_norm = LayerNormalization(d_model)
def __init__(self, feature_columns, hidden_units, activation='relu', dnn_dropout=0., embed_reg=1e-6, w_reg=1e-6): """ Wide&Deep :param feature_columns: A list. sparse column feature information. :param hidden_units: A list. Neural network hidden units. :param activation: A string. Activation function of dnn. :param dnn_dropout: A scalar. Dropout of dnn. :param embed_reg: A scalar. The regularizer of embedding. :param w_reg: A scalar. The regularizer of Linear. """ super(WideDeep, self).__init__() self.sparse_feature_columns = feature_columns self.embed_layers = { 'embed_' + str(i): Embedding(input_dim=feat['feat_num'], input_length=1, output_dim=feat['embed_dim'], embeddings_initializer='random_uniform', embeddings_regularizer=l2(embed_reg)) for i, feat in enumerate(self.sparse_feature_columns) } self.index_mapping = [] self.feature_length = 0 for feat in self.sparse_feature_columns: self.index_mapping.append(self.feature_length) self.feature_length += feat['feat_num'] self.dnn_network = DNN(hidden_units, activation, dnn_dropout) self.linear = Linear(self.feature_length, w_reg=w_reg) self.final_dense = Dense(1, activation=None)
def train(epochs, batch_size, hidden_size, learning_rate): """ Train a simple feed-forward network to classify MNIST digits, using vanilla SGD to minimize the categorical cross entropy between network outputs and ground truth labels. """ ff = Sequence(Linear(784, hidden_size), ReLU(), Linear(hidden_size, hidden_size), ReLU(), Linear(hidden_size, 10)) loss = cross_entropy_loss_with_logits loss_grad = cross_entropy_loss_with_logits_grad val_set = mnist(val=True) def val(): gen = val_set() val_sum = 0.0 for i, data in enumerate(gen): input, label = data output = ff.forward(input) val_sum += np.argmax(output) == label print "Val", val_sum / float(i) optim = GradientDescentOptimizer(ff, lr=learning_rate) train_set = mnist() print "Training .." for epoch in xrange(epochs): loss_sum = 0.0 gen = train_set() for i, data in enumerate(gen): input, label = data label = np.array(label, dtype=np.int32) output = ff.forward(input) ff.backward(loss_grad(label, output)) if i > 0 and (i % batch_size == 0): optim.step() loss_sum += loss(label, output) print epoch, "Loss", loss_sum / i val()
def _affine_backward(self, x, w, b, dout): layer = Linear(w.shape[0], w.shape[1]) layer.weight = w layer.bias = b tmp = layer.forward(x) layer.backward(dout) return layer.dx, layer.dw, layer.db
def __init__(self): super(FastSpeech2, self).__init__() self.encoder = Encoder() self.variance_adaptor = VarianceAdaptor() self.decoder = Decoder() self.mel_linear = Linear(hp.decoder_hidden, hp.n_mel_channels) self.postnet = PostNet()
def _create(self, hidden, k, layer, dropout=None): if layer == 1: return OrderedDict([Linear(784, 10, 0)]) d = OrderedDict() for i in range(layer): if i == 0: d['linear' + str(i)] = Linear(784, hidden, k, self.unified) d['relu' + str(i)] = nn.ReLU() if dropout: d['dropout' + str(i)] = nn.Dropout(p=dropout) elif i == layer - 1: d['linear' + str(i)] = Linear(hidden, 10, 0, self.unified) else: d['linear' + str(i)] = Linear(hidden, hidden, k, self.unified) d['relu' + str(i)] = nn.ReLU() if dropout: d['dropout' + str(i)] = nn.Dropout(p=dropout) return d
def _create(self, hidden, k, layer, dropout=None): if layer == 1: return OrderedDict([Linear(784, 10, 0)]) d = OrderedDict() for i in range(layer): if i == 0: # input layer case d['linear' + str(i)] = Linear(784, hidden, k, self.unified) d['relu' + str(i)] = nn.ReLU() if dropout: d['dropout' + str(i)] = nn.Dropout(p=dropout) elif i == layer - 1: # final layer/readout layer. d['linear' + str(i)] = Linear(hidden, 10, 0, self.unified) else: # standard middle layer d['linear' + str(i)] = Linear(hidden, hidden, k, self.unified) d['relu' + str(i)] = nn.ReLU() if dropout: d['dropout' + str(i)] = nn.Dropout(p=dropout) return d
def __init__(self, n_layers, d_k, d_v, d_model, d_ff, n_heads, max_tgt_seq_len, tgt_vocab_size, dropout, weighted_model, share_proj_weight, n_experts=10): super(LMTransformer, self).__init__() self.decoder = Decoder(n_layers, d_k, d_v, d_model, d_ff, n_heads, max_tgt_seq_len, tgt_vocab_size, dropout, weighted_model) self.tgt_proj = Linear(d_model, tgt_vocab_size, bias=False) self.weighted_model = weighted_model self.head = MoShead(tgt_vocab_size, d_model, self.decoder, share_proj_weight, n_experts)
def __init__(self, d_model: int = 512, d_ff: int = 2048, dropout_p: float = 0.3, ffnet_style: str = 'ff') -> None: super(PositionWiseFeedForwardNet, self).__init__() self.ffnet_style = ffnet_style.lower() if self.ffnet_style == 'ff': self.feed_forward = nn.Sequential( Linear(d_model, d_ff), nn.Dropout(dropout_p), nn.ReLU(), Linear(d_ff, d_model), nn.Dropout(dropout_p), ) elif self.ffnet_style == 'conv': self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) self.relu = nn.ReLU() self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) else: raise ValueError("Unsupported mode: {0}".format(self.mode))
def __init__( self, num_classes: int, # number of classfication max_length: int = 150, # a maximum allowed length for the sequence to be processed hidden_dim: int = 1024, # dimension of RNN`s hidden state vector pad_id: int = 0, # pad token`s id sos_id: int = 1, # start of sentence token`s id eos_id: int = 2, # end of sentence token`s id attn_mechanism: str = 'multi-head', # type of attention mechanism num_heads: int = 4, # number of attention heads num_layers: int = 2, # number of RNN layers rnn_type: str = 'lstm', # type of RNN cell dropout_p: float = 0.3, # dropout probability device: str = 'cuda' # device - 'cuda' or 'cpu' ) -> None: super(Speller, self).__init__(hidden_dim, hidden_dim, num_layers, rnn_type, dropout_p, False, device) self.num_classes = num_classes self.num_heads = num_heads self.num_layers = num_layers self.max_length = max_length self.eos_id = eos_id self.sos_id = sos_id self.pad_id = pad_id self.attn_mechanism = attn_mechanism.lower() self.embedding = nn.Embedding(num_classes, hidden_dim) self.input_dropout = nn.Dropout(dropout_p) if self.attn_mechanism == 'loc': self.attention = AddNorm(LocationAwareAttention(hidden_dim, smoothing=True), hidden_dim) elif self.attn_mechanism == 'multi-head': self.attention = AddNorm(MultiHeadAttention(hidden_dim, num_heads), hidden_dim) elif self.attn_mechanism == 'additive': self.attention = AdditiveAttention(hidden_dim) elif self.attn_mechanism == 'scaled-dot': self.attention = AddNorm(ScaledDotProductAttention(hidden_dim), hidden_dim) else: raise ValueError("Unsupported attention: %s".format(attn_mechanism)) self.projection = AddNorm(Linear(hidden_dim, hidden_dim, bias=True), hidden_dim) self.generator = Linear(hidden_dim, num_classes, bias=False)
def __init__(self, n_layers, d_k, d_v, d_model, d_ff, n_heads, max_tgt_seq_len, tgt_vocab_size, dropout, weighted_model, share_proj_weight): super(LMTransformer, self).__init__() self.decoder = Decoder(n_layers, d_k, d_v, d_model, d_ff, n_heads, max_tgt_seq_len, tgt_vocab_size, dropout, weighted_model) self.tgt_proj = Linear(d_model, tgt_vocab_size, bias=False) self.weighted_model = weighted_model if share_proj_weight: print('Sharing target embedding and projection..') self.tgt_proj.weight = self.decoder.tgt_emb.weight
def test_Linear(self): np.random.seed(42) torch.manual_seed(42) batch_size, n_in, n_out = 2, 3, 4 for _ in range(100): # layers initialization torch_layer = torch.nn.Linear(n_in, n_out) custom_layer = Linear(n_in, n_out) custom_layer.W = torch_layer.weight.data.numpy() custom_layer.b = torch_layer.bias.data.numpy() layer_input = np.random.uniform( -10, 10, (batch_size, n_in)).astype(np.float32) next_layer_grad = np.random.uniform( -10, 10, (batch_size, n_out)).astype(np.float32) # 1. check layer output custom_layer_output = custom_layer.updateOutput(layer_input) layer_input_var = Variable(torch.from_numpy(layer_input), requires_grad=True) torch_layer_output_var = torch_layer(layer_input_var) self.assertTrue( np.allclose(torch_layer_output_var.data.numpy(), custom_layer_output, atol=1e-6)) # 2. check layer input grad custom_layer_grad = custom_layer.updateGradInput( layer_input, next_layer_grad) torch_layer_output_var.backward(torch.from_numpy(next_layer_grad)) torch_layer_grad_var = layer_input_var.grad self.assertTrue( np.allclose(torch_layer_grad_var.data.numpy(), custom_layer_grad, atol=1e-6)) # 3. check layer parameters grad custom_layer.accGradParameters(layer_input, next_layer_grad) weight_grad = custom_layer.gradW bias_grad = custom_layer.gradb torch_weight_grad = torch_layer.weight.grad.data.numpy() torch_bias_grad = torch_layer.bias.grad.data.numpy() self.assertTrue( np.allclose(torch_weight_grad, weight_grad, atol=1e-6)) self.assertTrue(np.allclose(torch_bias_grad, bias_grad, atol=1e-6))
def __init__( self, input_size: int, # size of input num_classes: int, # number of classfication rnn_type='gru', # type of RNN cell num_rnn_layers: int = 5, # number of RNN layers rnn_hidden_dim: int = 512, # dimension of RNN`s hidden state dropout_p: float = 0.1, # dropout probability bidirectional: bool = True, # if True, becomes a bidirectional rnn activation: str = 'hardtanh', # type of activation function device: str = 'cuda' # device - 'cuda' or 'cpu' ): super(DeepSpeech2, self).__init__() self.rnn_layers = list() self.device = device input_size = int(math.floor(input_size + 2 * 20 - 41) / 2 + 1) input_size = int(math.floor(input_size + 2 * 10 - 21) / 2 + 1) input_size <<= 5 rnn_output_size = rnn_hidden_dim << 1 if bidirectional else rnn_hidden_dim self.conv = DeepSpeech2Extractor(activation, mask_conv=True) for idx in range(num_rnn_layers): self.rnn_layers.append( BNReluRNN( input_size=input_size if idx == 0 else rnn_output_size, hidden_dim=rnn_hidden_dim, rnn_type=rnn_type, bidirectional=bidirectional, dropout_p=dropout_p, device=device)) self.fc = nn.Sequential( Linear(rnn_output_size, rnn_hidden_dim), nn.ReLU(), Linear(rnn_hidden_dim, num_classes, bias=False))
def main(): current_state = STATE_SETUP worker_name = 'worker' if len(sys.argv) > 1: worker_name = sys.argv[1] print('Initializing worker ' + worker_name) while True: answer = socket_adapter.send_message(get_formated_message('setup', current_state), wait_answer=True) if answer['key'] == current_state and answer['code'] == code.CODE_OK: print("Worker successfully registered") else: print("Error on setup | message:{}".format(answer)) learning_parameters = answer['data'] if learning_parameters and answer['code'] == code.CODE_OK: input_size = learning_parameters['input_size'] output_size = learning_parameters['output_size'] eta = learning_parameters['eta'] iterations = learning_parameters['iterations'] break else: print("Waiting for setup data") time.sleep(2) print('Learning parameters are: {}'.format(learning_parameters)) x,y = generate_data(input_size, output_size) X = standardize(x) Y = standardize(y) model = Linear(X.shape[1],Y.shape[1]) optim = LossMSE() trainer = Trainer(model, optim) while True: current_state = STATE_LEARNING print("Waiting to start learning") answer = socket_adapter.send_message(get_formated_message('',current_state), wait_answer=True) if answer['code'] == code.CODE_OK: print("Start learning") break time.sleep(2) cost = trainer.trainGD(X,Y,iterations, eta=eta, update_func=on_params_update) plotCostAndData(model,X,Y,cost, fig_name=worker_name)
def __init__(self, feature_columns, hidden_units, cin_size, dnn_dropout=0, dnn_activation='relu', embed_reg=1e-6, cin_reg=1e-6, w_reg=1e-6): """ xDeepFM :param feature_columns: A list. sparse column feature information. :param hidden_units: A list. a list of dnn hidden units. :param cin_size: A list. a list of the number of CIN layers. :param dnn_dropout: A scalar. dropout of dnn. :param dnn_activation: A string. activation function of dnn. :param embed_reg: A scalar. The regularizer of embedding. :param cin_reg: A scalar. The regularizer of cin. :param w_reg: A scalar. The regularizer of Linear. """ super(xDeepFM, self).__init__() self.sparse_feature_columns = feature_columns self.embed_dim = self.sparse_feature_columns[0]['embed_dim'] self.embed_layers = { 'embed_' + str(i): Embedding(input_dim=feat['feat_num'], input_length=1, output_dim=feat['embed_dim'], embeddings_initializer='random_normal', embeddings_regularizer=l2(embed_reg)) for i, feat in enumerate(self.sparse_feature_columns) } self.index_mapping = [] self.feature_length = 0 for feat in self.sparse_feature_columns: self.index_mapping.append(self.feature_length) self.feature_length += feat['feat_num'] self.linear = Linear(self.feature_length, w_reg) self.cin = CIN(cin_size=cin_size, l2_reg=cin_reg) self.dnn = DNN(hidden_units=hidden_units, dnn_dropout=dnn_dropout, dnn_activation=dnn_activation) self.cin_dense = Dense(1) self.dnn_dense = Dense(1) self.bias = self.add_weight(name='bias', shape=(1, ), initializer=tf.zeros_initializer())
def _convert_to_nn(self, svm_model, y_train, x_val): #convert to linear NN print('converting {} model to linear NN'.format( self.__class__.__name__)) W = svm_model.coef_.T B = svm_model.intercept_ if numpy.unique(y_train).size == 2: linear_layer = Linear(W.shape[0], 2) linear_layer.W = numpy.concatenate([-W, W], axis=1) linear_layer.B = numpy.concatenate([-B, B], axis=0) else: linear_layer = Linear(*(W.shape)) linear_layer.W = W linear_layer.B = B svm_model = self.model nn_model = Sequential([Flatten(), linear_layer]) if not self.use_gpu: nn_model.to_numpy() #sanity check model conversion self._sanity_check_model_conversion(svm_model, nn_model, x_val) print('model conversion sanity check passed') return nn_model
# normalize inputs train_input = (train_input - train_input.mean(dim=1)[:, None] ) / train_input.std(dim=1)[:, None] test_input = (test_input - test_input.mean(dim=1)[:, None]) / test_input.std(dim=1)[:, None] # In[] # training overallTestAcc = [] overallTrainAcc = [] for eva in range(evaluateIter): # create a model model = sequential(Linear(input_size=2, output_size=25), ReLU(), batchNormalization(batchSize, input_size=25), Linear(input_size=25, output_size=25), ReLU(), batchNormalization(batchSize, input_size=25), Linear(input_size=25, output_size=25), ReLU(), batchNormalization(batchSize, input_size=25), Linear(input_size=25, output_size=2)) # define criterion and optimizer criterion = MSELoss(method='mean') optimizer = SGD(model.parameters(), lr=learningRate) trainLossList = [] trainNumList = [] testLossList = [] testNumList = []
def _read_txt_helper(path): with open(path,'rb') as f: content = f.read().split('\n') modules = [] c = 0 line = content[c] while len(line) > 0: if line.startswith(Linear.__name__): # @UndefinedVariable import error suppression for PyDev users ''' Format of linear layer Linear <rows_of_W> <columns_of_W> <flattened weight matrix W> <flattened bias vector> ''' _,m,n = line.split(); m = int(m); n = int(n) layer = Linear(m,n) layer.W = np.array([float(weightstring) for weightstring in content[c+1].split() if len(weightstring) > 0]).reshape((m,n)) layer.B = np.array([float(weightstring) for weightstring in content[c+2].split() if len(weightstring) > 0]) modules.append(layer) c+=3 # the description of a linear layer spans three lines elif line.startswith(Convolution.__name__): # @UndefinedVariable import error suppression for PyDev users ''' Format of convolution layer Convolution <rows_of_W> <columns_of_W> <depth_of_W> <number_of_filters_W> <stride_axis_0> <stride_axis_1> <flattened filter block W> <flattened bias vector> ''' _,h,w,d,n,s0,s1 = line.split() h = int(h); w = int(w); d = int(d); n = int(n); s0 = int(s0); s1 = int(s1) layer = Convolution(filtersize=(h,w,d,n), stride=(s0,s1)) layer.W = np.array([float(weightstring) for weightstring in content[c+1].split() if len(weightstring) > 0]).reshape((h,w,d,n)) layer.B = np.array([float(weightstring) for weightstring in content[c+2].split() if len(weightstring) > 0]) modules.append(layer) c+=3 #the description of a convolution layer spans three lines elif line.startswith(SumPool.__name__): # @UndefinedVariable import error suppression for PyDev users ''' Format of sum pooling layer SumPool <mask_heigth> <mask_width> <stride_axis_0> <stride_axis_1> ''' _,h,w,s0,s1 = line.split() h = int(h); w = int(w); s0 = int(s0); s1 = int(s1) layer = SumPool(pool=(h,w),stride=(s0,s1)) modules.append(layer) c+=1 # one line of parameterized layer description elif line.startswith(MaxPool.__name__): # @UndefinedVariable import error suppression for PyDev users ''' Format of max pooling layer MaxPool <mask_heigth> <mask_width> <stride_axis_0> <stride_axis_1> ''' _,h,w,s0,s1 = line.split() h = int(h); w = int(w); s0 = int(s0); s1 = int(s1) layer = MaxPool(pool=(h,w),stride=(s0,s1)) modules.append(layer) c+=1 # one line of parameterized layer description elif line.startswith(Flatten.__name__): # @UndefinedVariable import error suppression for PyDev users modules.append(Flatten()) ; c+=1 #one line of parameterless layer description elif line.startswith(Rect.__name__): # @UndefinedVariable import error suppression for PyDev users modules.append(Rect()) ; c+= 1 #one line of parameterless layer description elif line.startswith(Tanh.__name__): # @UndefinedVariable import error suppression for PyDev users modules.append(Tanh()) ; c+= 1 #one line of parameterless layer description elif line.startswith(SoftMax.__name__): # @UndefinedVariable import error suppression for PyDev users modules.append(SoftMax()) ; c+= 1 #one line of parameterless layer description else: raise ValueError('Layer type identifier' + [s for s in line.split() if len(s) > 0][0] + ' not supported for reading from plain text file') #skip info of previous layers, read in next layer header line = content[c] return Sequential(modules)
weights = weights[None, :, :] weights.transpose(1,2).shape input = torch.Tensor([[1, 2, 3, 4, 5], [1, 2, 3, 0, 0], [1, 1, 1, 1, 1]]) bias = torch.Tensor([1, 2, 3, 4]) bias.shape ''' input = input[:, :, None] weights.matmul(input).squeeze() + bias''' lin = Linear(5, 4, ReLU()) output = lin.forward(input) target = torch.Tensor([[0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 1, 0]]) d_loss = dloss(output, target) prev_dl_dx = lin.backward(d_loss) prev_dl_dx.shape ex_dloss = torch.Tensor([[.1, .2, .2, .1], [.1, .2, .2, .1], [.1, .2, .2, .1]])
def _read_txt_helper(path): with open(path, 'rb') as f: content = f.read().split('\n') modules = [] c = 0 line = content[c] while len(line) > 0: if line.startswith( Linear.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of linear layer Linear <rows_of_W> <columns_of_W> <flattened weight matrix W> <flattened bias vector> ''' _, m, n = line.split() m = int(m) n = int(n) layer = Linear(m, n) layer.W = np.array([ float(weightstring) for weightstring in content[c + 1].split() if len(weightstring) > 0 ]).reshape((m, n)) layer.B = np.array([ float(weightstring) for weightstring in content[c + 2].split() if len(weightstring) > 0 ]) modules.append(layer) c += 3 # the description of a linear layer spans three lines elif line.startswith( Convolution.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of convolution layer Convolution <rows_of_W> <columns_of_W> <depth_of_W> <number_of_filters_W> <stride_axis_0> <stride_axis_1> <flattened filter block W> <flattened bias vector> ''' _, h, w, d, n, s0, s1 = line.split() h = int(h) w = int(w) d = int(d) n = int(n) s0 = int(s0) s1 = int(s1) layer = Convolution(filtersize=(h, w, d, n), stride=(s0, s1)) layer.W = np.array([ float(weightstring) for weightstring in content[c + 1].split() if len(weightstring) > 0 ]).reshape((h, w, d, n)) layer.B = np.array([ float(weightstring) for weightstring in content[c + 2].split() if len(weightstring) > 0 ]) modules.append(layer) c += 3 #the description of a convolution layer spans three lines elif line.startswith( SumPool.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of sum pooling layer SumPool <mask_heigth> <mask_width> <stride_axis_0> <stride_axis_1> ''' _, h, w, s0, s1 = line.split() h = int(h) w = int(w) s0 = int(s0) s1 = int(s1) layer = SumPool(pool=(h, w), stride=(s0, s1)) modules.append(layer) c += 1 # one line of parameterized layer description elif line.startswith( MaxPool.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of max pooling layer MaxPool <mask_heigth> <mask_width> <stride_axis_0> <stride_axis_1> ''' _, h, w, s0, s1 = line.split() h = int(h) w = int(w) s0 = int(s0) s1 = int(s1) layer = MaxPool(pool=(h, w), stride=(s0, s1)) modules.append(layer) c += 1 # one line of parameterized layer description elif line.startswith( Flatten.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Flatten()) c += 1 #one line of parameterless layer description elif line.startswith( Rect.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Rect()) c += 1 #one line of parameterless layer description elif line.startswith( Tanh.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Tanh()) c += 1 #one line of parameterless layer description elif line.startswith( SoftMax.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(SoftMax()) c += 1 #one line of parameterless layer description else: raise ValueError( 'Layer type identifier' + [s for s in line.split() if len(s) > 0][0] + ' not supported for reading from plain text file') #skip info of previous layers, read in next layer header line = content[c] return Sequential(modules)