def __init__(self, shape=()): super(SimpleChain, self).__init__() w_np = np.asarray(np.random.randn(*shape)).astype(np.float32) with self.init_scope(): self.w = chainer.Parameter(w_np, name='w')
def test_assign_param_in_init_scope(self): p = chainer.Parameter() with self.c1.init_scope(): self.c1.p = p self.assertIn(p, self.c1.params())
def __init__(self): super().__init__() with self.init_scope(): self.bias = chainer.Parameter(0, shape=1)
def setUp(self): self.target = chainer.Link() with self.target.init_scope(): self.target.w = chainer.Parameter()
def __init__(self, w, g): super(SimpleLink, self).__init__() with self.init_scope(): self.param = chainer.Parameter(w) self.param.grad = g
initial_p) return link @_inject_backend_tests @pytest.mark.xfail(strict=True, raises=TypeError) class TestLinkIncorrectCreateLink(DotLinkTestBase, testing.LinkTestCase): def create_link(self, initializers): # Invalid return type (that is not an instance of chainer.Link). return numpy.array([1]) @testing.parameterize(*testing.product({ 'invalid_forward_backward_initializer': [ chainer.Variable(numpy.array([1])), chainer.Parameter(numpy.array([1])), ] })) @_inject_backend_tests @pytest.mark.xfail(strict=True, raises=TypeError) class TestLinkIncorrectForwardBackwardInitializers(DotLinkTestBase, testing.LinkTestCase): def generate_params(self): return self.invalid_forward_backward_initializer, @_inject_backend_tests @pytest.mark.xfail(strict=True, raises=testing.LinkTestError) class TestLinkIncorrectBackwardInitializers(DotLinkTestBase, testing.LinkInitializersTestCase): def create_link(self, initializers):
def __init__(self, model): super(RegressionTrainChain, self).__init__() with self.init_scope(): self.model = model self.weight = chainer.Parameter(np.ones(model.n_task, dtype=np.float32))
def preprocess_caffemodel(self, net): """Apply preprocess to a CaffeFunction object. It merges a Scale layer that runs right after BN layer. Caffe BN layer only has normalization parameters, which are mean and var, but BN operation actually needs scale and shift parameter. That process is done by succeeding Scale layer. In chainer both normalization and scale/shift parameters can be stored in BN object. This function eliminates Scale layer and bring its parameter into BN layer. This function has to be called before running net.__call__(). Args: net (chainer.links.caffe.caffeFunction): CaffeFunction object to be retrieved. Layers will be modified. """ BatchNormalization = \ chainer.links.normalization.batch_normalization.BatchNormalization Scale = chainer.links.connection.scale.Scale if self.verbose: print("Applying caffemodel preprocessing to the network") # Algorithm: # There are 2 patterns of mergeable BN+Scale. # 1. Both BN and Scale refer the same src (ResNet caffemodel pattern) # 2. Scale refers BN (DenseNet caffemodel patten) # for pattern 1, finding common source (str => index) bn_scale_common_sources = dict() bns = dict() scale_indices = [] for i, (func_name, bottoms, _) in enumerate(net.layers): if not hasattr(net, func_name): # function that has no parameter like ReLU doesn't exist # in `net` as an attr but BN and Scale do continue layer = getattr(net, func_name) # Find sources that are referred by both BN and Scale layers for src_name in bottoms: # In net.layers, BN always comes first and Scale comes next if type(layer) is BatchNormalization: # For patten 1: Mark the source `bottom` is referred by BN bn_scale_common_sources[src_name] = i # For patten 2: Mark the layer is BN bns[func_name] = i elif type(layer) is Scale: if bn_scale_common_sources.get(src_name) is not None: # Pattern 1 found: # This source `bottom` is already marked; # That is obviously BN i = bn_scale_common_sources[src_name] bn_layer_name, _, _ = net.layers[i] bn = getattr(net, bn_layer_name) bn.gamma = chainer.Parameter(layer.W.data) if hasattr(layer, 'bias'): bn.beta = chainer.Parameter(layer.bias.b.data) scale_indices.append(i) if self.verbose: print('Mergeable BN detected ' '(BN:{}/Scale:{} both refer {})'.format( bn_layer_name, func_name, src_name)) elif src_name in bns: # Pattern 2 found: This scale layers refers BN layer!! bn = getattr(net, src_name) bn.gamma = chainer.Parameter(layer.W.data) if hasattr(layer, 'bias'): bn.beta = chainer.Parameter(layer.bias.b.data) scale_indices.append(i) if self.verbose: print('Mergeable BN detected ' '(Scale:{} refers BN:{})'.format( func_name, src_name)) # Remove scale layers that are no longer necessary for i in sorted(scale_indices, reverse=True): del net.layers[i]
def __init__(self, dim_in=512, scaling=1.0, use_bn=True, use_up_sampling=False, symmetric=False): super(ConvolutionShapeDecoder, self).__init__() self.grid_size = 16 self.obj_scale = 0.5 self.tanh_scale = 1.2 self.scaling = scaling self.use_up_sampling = use_up_sampling self.symmetric = symmetric # create base shape & faces and transforming matrix self.vertices_base = None self.vertices_matrix = None self.num_vertices = None self.symmetric_matrix = None self.faces = None self.degrees = None self.init_vertices_base() self.init_faces() self.laplacian = get_graph_laplacian(self.faces, self.num_vertices) self.normalize_vertices_base() # init NN layers with self.init_scope(): dim_h = [512, 256, 128, 64, 3] init = chainer.initializers.HeNormal() layer_list = {} if use_bn: Normalization = cl.BatchNormalization no_bias = True else: Normalization = layers.DummyLayer no_bias = False for i in range(6): layer_list['linear_p%d_1' % i] = cl.Linear(dim_in, dim_h[0] * 4, initialW=init, nobias=no_bias) if not use_up_sampling: layer_list['conv_p%d_1' % i] = (cl.Deconvolution2D( dim_h[0], dim_h[1], 3, 2, 1, outsize=(4, 4), initialW=init, nobias=no_bias)) layer_list['conv_p%d_2' % i] = (cl.Deconvolution2D( dim_h[1], dim_h[2], 3, 2, 1, outsize=(8, 8), initialW=init, nobias=no_bias)) layer_list['conv_p%d_3' % i] = (cl.Deconvolution2D( dim_h[2], dim_h[3], 3, 2, 1, outsize=(16, 16), initialW=init, nobias=no_bias)) else: layer_list['conv_p%d_1' % i] = (cl.Convolution2D( dim_h[0], dim_h[1], 3, pad=1, initialW=init, nobias=no_bias)) layer_list['conv_p%d_2' % i] = (cl.Convolution2D( dim_h[1], dim_h[2], 3, pad=1, initialW=init, nobias=no_bias)) layer_list['conv_p%d_3' % i] = (cl.Convolution2D( dim_h[2], dim_h[3], 3, pad=1, initialW=init, nobias=no_bias)) layer_list['conv_p%d_4' % i] = (cl.Convolution2D( dim_h[3], dim_h[4], 3, 1, 1, initialW=init)) layer_list['linear_p%d_1_bn' % i] = Normalization(dim_h[0]) layer_list['conv_p%d_1_bn' % i] = Normalization(dim_h[1]) layer_list['conv_p%d_2_bn' % i] = Normalization(dim_h[2]) layer_list['conv_p%d_3_bn' % i] = Normalization(dim_h[3]) for k, v in layer_list.items(): setattr(self, k, v) self.vertices_base = chainer.Parameter(self.vertices_base)
def __init__(self): chainer.Link.__init__(self) with self.init_scope(): self.p1 = chainer.Parameter() self.p2 = chainer.Parameter( numpy.array([1, 2], numpy.float32))
def __init__(self, z): super(Z_link, self).__init__() with self.init_scope(): self.z = chainer.Parameter(z)
def main(): experiment_name = "Stacked_16_16_16_16" snapshot_name = "snapshot_iter_27215" config_path = "/efs/fMRI_AE/{}/log/config.yml".format(experiment_name) config = load_config(config_path) config["additional information"]["mask"]["loader"]["params"][ "mask_path"] = path.join( config["additional information"]["mask"]["directory"], config["additional information"]["mask"]["file"]) config["additional information"]["mask"]["loader"]["params"][ "crop"] = config["additional information"]["crop"] snapshot_path = "/efs/fMRI_AE/{}/model/{}".format(experiment_name, snapshot_name) # print("configured as follows:") # print(yaml_dump(config)) while True: s = input("ok? (y/n):") if s == 'y' or s == 'Y': log_config(config, "training start") break elif s == 'n' or s == 'N': destroy_config(config) exit(1) try: try: print("mask loading...") load_mask_module = import_module( config["additional information"]["mask"]["loader"]["module"], config["additional information"]["mask"]["loader"]["package"]) load_mask = getattr( load_mask_module, config["additional information"]["mask"]["loader"]["function"]) mask = load_mask( **config["additional information"]["mask"]["loader"]["params"]) print("done.") print("mask.shape: {}".format(mask.shape)) except FileNotFoundError as e: raise e model_module = import_module(config["model"]["module"], config["model"]["package"]) Model = getattr(model_module, config["model"]["class"]) model = Model(mask=mask, **config["model"]["params"]) finetune_config = config["additional information"]["finetune"] if finetune_config is not None: load_npz(path.join(finetune_config["directory"], finetune_config["file"]), model, strict=False) try: chainer.cuda.get_device_from_id(0).use() gpu = 0 print("transferring model to GPU...") model.to_gpu(gpu) print("GPU enabled") except RuntimeError: gpu = -1 print("GPU disabled") dataset_module = import_module(config["dataset"]["module"], config["dataset"]["package"]) Dataset = getattr(dataset_module, config["dataset"]["class"]) train_dataset = Dataset(**config["dataset"]["train"]["params"]) valid_dataset = Dataset(**config["dataset"]["valid"]["params"]) train_iterator = Iterator(train_dataset, config["batch"]["train"], True, True) valid_iterator = Iterator(valid_dataset, config["batch"]["valid"], False, False) Optimizer = getattr(chainer.optimizers, config["optimizer"]["class"]) optimizer = Optimizer(**config["optimizer"]["params"]) optimizer.setup(model) for hook_config in config["optimizer"]["hook"]: hook_module = import_module(hook_config["module"], hook_config["package"]) Hook = getattr(hook_module, hook_config["class"]) hook = Hook(**hook_config["params"]) optimizer.add_hook(hook) updater = Updater(train_iterator, optimizer, device=gpu) trainer = Trainer(updater, **config["trainer"]["params"]) trainer.extend(snapshot(), trigger=config["trainer"]["snapshot_interval"]) trainer.extend(snapshot_object(model, "model_iter_{.updater.iteration}"), trigger=config["trainer"]["model_interval"]) trainer.extend(observe_lr(), trigger=config["trainer"]["log_interval"]) trainer.extend( LogReport([ "epoch", "iteration", "main/loss", "main/pca_loss", "main/reconstruction_loss", "validation/main/loss" ], trigger=config["trainer"]["log_interval"])) trainer.extend(Evaluator(valid_iterator, model, device=gpu), trigger=config["trainer"]["eval_interval"]) trainer.extend(PrintReport([ "epoch", "iteration", "main/loss", "main/pca_loss", "main/reconstruction_loss", "validation/main/loss" ]), trigger=config["trainer"]["log_interval"]) trainer.extend(ProgressBar(update_interval=1)) if "schedule" in config["additional information"].keys(): for i, interval_funcs in enumerate( config["additional information"]["schedule"].items()): interval, funcs = interval_funcs f = lambda trainer, funcs=funcs: [ trainer.updater.get_optimizer('main').target. __getattribute__(func["function"])(*func["params"]) for func in funcs ] trainer.extend(f, name="schedule_{}".format(i), trigger=ManualScheduleTrigger(*interval)) load_npz(snapshot_path, trainer) target = trainer.updater.get_optimizer("main").target target.reset_pca() target.attach_pca() ipca_param = np.load( "/efs/fMRI_AE/Stacked_8_8_8_8_feature/ipca_mean_7920_components_990_7920.npz" ) target.pca.W = chainer.Parameter(ipca_param["components"]) target.pca.bias = chainer.Parameter(ipca_param["mean"]) target.pca.disable_update() target.pca.to_gpu(gpu) target.detach_pca_loss() target.attach_reconstruction_loss() target.release_decoder() target.freeze_encoder() trainer.run() log_config(config, "succeeded") except Exception as e: log_config(config, "unintentional termination") raise e
def calculate_local_lipschitz(self): print('\rlocal Lipschitz start', flush=True) iterator = self.iterator preprocess = self.preprocess target = self.target eval_func = self.eval_func or (lambda x: target(preprocess(x))) device = self.device or chainer.cuda.cupy.cuda.get_device_id() assert device >= 0 if self.eval_hook: self.eval_hook(self) # gradを計算して勾配をsamplingする if hasattr(iterator, 'reset'): iterator.reset() it = iterator else: it = copy.copy(iterator) self.global_grad = chainer.cuda.cupy.zeros( (self.n_class, self.n_class), dtype=chainer.cuda.cupy.float32) margin_list = [] size = 0 total = len(it.dataset) for batch in it: size += len(batch) sys.stdout.write('\r{0}/{1}'.format(size, total)) sys.stdout.flush() x, t = self.converter(batch, device) xp = chainer.cuda.get_array_module(x) c = xp.ones((1, ), dtype=np.float32) local_grad = xp.zeros((self.n_class, self.n_class), dtype=xp.float32) with chainer.force_backprop_mode(): for _ in range(100): noise = xp.random.normal(size=x.shape).astype(xp.float32) normalize(noise) x2 = chainer.Parameter(x + noise) y, t, _ = eval_func((x2, t, c)) for i in range(self.n_class): for j in range(i + 1, self.n_class): if i == j: continue target.cleargrads() x2.grad = None F.sum(y[:, i] - y[:, j]).backward() norm = xp.max( xp.sqrt((x2.grad**2).sum( axis=tuple(range(1, x2.ndim))))) local_grad[i, j] = max(local_grad[i, j], norm) for i in range(self.n_class): for j in range(i + 1, self.n_class): local_grad[j, i] = local_grad[i, j] self.global_grad[:] = xp.maximum(self.global_grad, local_grad) with chainer.no_backprop_mode(): y, t, _ = eval_func((x, t, c)) y = y.array grad = local_grad[t] margins = self.get_margin( y, y[list(range(t.size)), t].reshape(t.size, 1), grad) margins = xp.min(margins, axis=1) margin_list.extend(list(margins.get())) return margin_list
def __init__(self, initializer, size): super(Const, self).__init__() with self.init_scope(): self.b = chainer.Parameter(initializer, size)
def __init__(self): super(SimpleChain, self).__init__() with self.init_scope(): self.w = chainer.Parameter(42, (), 'w')
def __init__(self, dim_in=512, scaling=1.0): super(ConvolutionShapeDecoder, self).__init__() self.grid_size = 16 self.obj_scale = 0.5 self.scaling = scaling # create base shape & faces and transforming matrix self.vertices_base = None self.vertices_matrix = None self.num_vertices = None self.faces = None self.init_vertices_base() self.init_faces() self.laplacian = get_graph_laplacian(self.faces, self.num_vertices) self.normalize_vertices_base() # init NN layers with self.init_scope(): dh = [512, 256, 128, 64, 3] init = chainer.initializers.HeNormal() layers = {} for i in range(6): layers['linear_p%d_in' % i] = cl.Linear(dim_in, dh[0] * 4, initialW=init, nobias=True) layers['conv_p%d_1_1_1' % i] = (cl.Deconvolution2D( dh[0], dh[1], 3, 2, 1, outsize=(4, 4), initialW=init, nobias=True)) layers['conv_p%d_1_1_2' % i] = (cl.Convolution2D(dh[1], dh[1], 3, 1, 1, initialW=init, nobias=True)) layers['conv_p%d_1_1_3' % i] = (cl.Deconvolution2D( dh[0], dh[1], 1, 2, 0, outsize=(4, 4), initialW=init, nobias=True)) layers['conv_p%d_1_2_1' % i] = (cl.Convolution2D(dh[1], dh[1], 3, 1, 1, initialW=init, nobias=True)) layers['conv_p%d_1_2_2' % i] = (cl.Convolution2D(dh[1], dh[1], 3, 1, 1, initialW=init, nobias=True)) layers['conv_p%d_2_1_1' % i] = (cl.Deconvolution2D( dh[1], dh[2], 3, 2, 1, outsize=(8, 8), initialW=init, nobias=True)) layers['conv_p%d_2_1_2' % i] = (cl.Convolution2D(dh[2], dh[2], 3, 1, 1, initialW=init, nobias=True)) layers['conv_p%d_2_1_3' % i] = (cl.Deconvolution2D( dh[1], dh[2], 1, 2, 0, outsize=(8, 8), initialW=init, nobias=True)) layers['conv_p%d_2_2_1' % i] = (cl.Convolution2D(dh[2], dh[2], 3, 1, 1, initialW=init, nobias=True)) layers['conv_p%d_2_2_2' % i] = (cl.Convolution2D(dh[2], dh[2], 3, 1, 1, initialW=init, nobias=True)) layers['conv_p%d_3_1_1' % i] = (cl.Deconvolution2D( dh[2], dh[3], 3, 2, 1, outsize=(16, 16), initialW=init, nobias=True)) layers['conv_p%d_3_1_2' % i] = (cl.Convolution2D(dh[3], dh[3], 3, 1, 1, initialW=init, nobias=True)) layers['conv_p%d_3_1_3' % i] = (cl.Deconvolution2D( dh[2], dh[3], 1, 2, 0, outsize=(16, 16), initialW=init, nobias=True)) layers['conv_p%d_3_2_1' % i] = (cl.Convolution2D(dh[3], dh[3], 3, 1, 1, initialW=init, nobias=True)) layers['conv_p%d_3_2_2' % i] = (cl.Convolution2D(dh[3], dh[3], 3, 1, 1, initialW=init, nobias=True)) layers['linear_p%d_out' % i] = cl.Convolution2D(dh[3], dh[4], 1, 1, 0, initialW=init) layers['linear_p%d_in_bn' % i] = cl.BatchNormalization(dh[0]) layers['conv_p%d_1_1_2_bn' % i] = cl.BatchNormalization(dh[1]) layers['conv_p%d_1_2_1_bn' % i] = cl.BatchNormalization(dh[1]) layers['conv_p%d_1_2_2_bn' % i] = cl.BatchNormalization(dh[1]) layers['conv_p%d_2_1_1_bn' % i] = cl.BatchNormalization(dh[1]) layers['conv_p%d_2_1_2_bn' % i] = cl.BatchNormalization(dh[2]) layers['conv_p%d_2_2_1_bn' % i] = cl.BatchNormalization(dh[2]) layers['conv_p%d_2_2_2_bn' % i] = cl.BatchNormalization(dh[2]) layers['conv_p%d_3_1_1_bn' % i] = cl.BatchNormalization(dh[2]) layers['conv_p%d_3_1_2_bn' % i] = cl.BatchNormalization(dh[3]) layers['conv_p%d_3_2_1_bn' % i] = cl.BatchNormalization(dh[3]) layers['conv_p%d_3_2_2_bn' % i] = cl.BatchNormalization(dh[3]) layers['linear_p%d_out_bn' % i] = cl.BatchNormalization(dh[3]) for k, v in layers.items(): setattr(self, k, v) self.vertices_base = chainer.Parameter(self.vertices_base)
def __init__(self, fn, params): super(AnyModel, self).__init__() with self.init_scope(): for name, value in params.items(): setattr(self, name, chainer.Parameter(value, name=name)) self.fn = fn
def __init__(self, dim_in=512, scaling=1.0, symmetric=False): super(ConvolutionTextureDecoder, self).__init__() self.grid_size = 16 self.texture_size = 64 self.scaling = scaling self.symmetric = symmetric self.vertices = None self.faces = None self.compute_vertices() with self.init_scope(): dim_out = 3 dh = [512, 256, 128, 64] init = chainer.initializers.HeNormal() layer_list = {} for i in range(6): layer_list['linear_p%d_1' % i] = cl.Linear(dim_in, dh[0] * 4 * 4, initialW=init, nobias=True) layer_list['conv_p%d_1' % i] = (cl.Deconvolution2D( dh[0], dh[1], 5, 2, 2, outsize=(8, 8), initialW=init, nobias=True)) layer_list['conv_p%d_2' % i] = (cl.Deconvolution2D( dh[1], dh[2], 5, 2, 2, outsize=(16, 16), initialW=init, nobias=True)) layer_list['conv_p%d_3' % i] = (cl.Deconvolution2D( dh[2], dh[3], 5, 2, 2, outsize=(32, 32), initialW=init, nobias=True)) layer_list['conv_p%d_4' % i] = (cl.Deconvolution2D( dh[3], dim_out, 5, 2, 2, outsize=(64, 64), initialW=init)) layer_list['linear_p%d_1_bn' % i] = cl.BatchNormalization( dh[0]) layer_list['conv_p%d_1_bn' % i] = cl.BatchNormalization(dh[1]) layer_list['conv_p%d_2_bn' % i] = cl.BatchNormalization(dh[2]) layer_list['conv_p%d_3_bn' % i] = cl.BatchNormalization(dh[3]) for k, v in layer_list.items(): setattr(self, k, v) self.texture_base = chainer.Parameter( chainer.initializers.Constant(0), (3, self.texture_size, 6 * self.texture_size))
def __init__(self, w, g): super(SimpleLink, self).__init__() with self.init_scope(): self.param = chainer.Parameter(I.Zero(), w.shape) self.param.data = w self.param.grad = g
def train(X, Y, minibatch=1000, num_epoch=1000, patch_size=31, checkPath=None, savePath="./save/mix/feed/", device=1): op = VariableOperation(device=device) loss = softmax_cross_entropy optimizer = Adam(alpha=0.0002) embedding_dim = patch_size // 2 + 1 # _model = model_fun(design=design, device=device, embedding_dim=embedding_dim) _model = model(design=design5, device=0, embedding_dim=embedding_dim) if (checkPath is not None): param_dict = load_obj(checkPath) with device_guard(device): for key, param in _model.namedparams(): param.copydata(chainer.Parameter(cp.asarray(param_dict[key]))) optimizer.setup(_model) with device_guard(device): for epoch in range(num_epoch): # if(epoch%10==0): # check = epoch/10 # if(check%2==0): # print("L1, and only for pooling") # for key, l in _model.namednodes(): # if bool(re.search("linear", key)): # l.disable_update() # else: # l.enable_update() # optimizer.add_hook(Lasso(rate=0.001), name='lasso') # else: # print("No regularization, and only for linear") # for key, l in _model.namednodes(): # if bool(re.search("pool", key)): # l.disable_update() # else: # l.enable_update() # optimizer.remove_hook('lasso') start_time = timer() num = 0 count = 0 correct = 0 l = 0 indices = np.arange(0, len(X), minibatch) for start in indices: if start + minibatch < len(X): end = start + minibatch else: end = len(X) x, y = X[start:end], Y[start:end] x, y = tensor(x, device=device), tensor(y, device=device) x.cast('float32') x = _model(x) L = wrapper(loss, device, x, y) _model.cleargrads() L.backward() optimizer.update() # print(type(x), type(y)) # print(type(op.argmax(x, axis=1)), type(y)) correct = (tensor(op.argmax(x, axis=1), device=device) == y).sum() + correct l = Variable(L, device=device) + l num = num + y.shape[0] count = count + 1 print("epoch: ", epoch, "num ", num, "correct: ", correct / num, "loss: ", l / count, "time: ", timer() - start_time)
def __init__(self): super(Addition, self).__init__() with self.init_scope(): self.alpha = chainer.Parameter( np.ones((2, ), dtype=np.float32) * .5) self.alpha.parseval_alpha = True
def create_simple_link(): link = chainer.Link() with link.init_scope(): link.param = chainer.Parameter(np.zeros(1)) return link
def __init__(self): super(Link, self).__init__() with self.init_scope(): self.p = chainer.Parameter(initial_p)
def __init__(self, num_combinations: int, num_operations: int): super(Attention, self).__init__() self.attention = chainer.Parameter( initializer=chainer.initializers.Normal(scale=1e-3), shape=(num_combinations, num_operations) )
def test_assign_param_outside_of_init_scope(self): p = chainer.Parameter() self.link.p = p self.assertTrue(all(p is not param for param in self.link.params()))
def __init__(self, initial_log_temperature=0): super().__init__() with self.init_scope(): self.log_temperature = chainer.Parameter( np.array(initial_log_temperature, dtype=np.float32))
def __init__(self): super(SimpleLink, self).__init__() with self.init_scope(): init_w = numpy.random.uniform(-1, 1, (3, 5)).astype( numpy.float32) self.w = chainer.Parameter(init_w)
def __init__(self, vocab_size, ParameterClass): self.a = ParameterClass self.vocab_size = vocab_size # 特徴を獲得するため複数フィルター(部首単位,文字単位)を適用 self.cnn_window_sizes = [1, 2, 3, 3, 6, 9] self.cnn_stride_sizes = [1, 1, 1, 3, 3, 3] # ウインドウとストライドに応じたフィルター数を適用 self.cnn_filter_nums = [int(50*(w/r)) for w, r in zip(self.cnn_window_sizes, self.cnn_stride_sizes)] # poolingウインドウは畳み込み時のウインドウとストライドに依存(1単語ごと) self.pooling_window_sizes = [ int((self.a.radical_len * self.a.character_len - w) / r + 1) for w, r in zip(self.cnn_window_sizes, self.cnn_stride_sizes)] initializer = chainer.initializers.HeNormal() super(Model, self).__init__() with self.init_scope(): # Embedding self.embed = L.EmbedID( self.vocab_size, self.a.embed_dim, initialW=initializer) # Convolution (6種類) self.conv0 = L.Convolution2D( 1, self.cnn_filter_nums[0], ksize=(self.cnn_window_sizes[0], self.a.embed_dim), stride=(self.cnn_stride_sizes[0], self.a.embed_dim)) self.conv1 = L.Convolution2D( 1, self.cnn_filter_nums[1], ksize=(self.cnn_window_sizes[1], self.a.embed_dim), stride=(self.cnn_stride_sizes[1], self.a.embed_dim)) self.conv2 = L.Convolution2D( 1, self.cnn_filter_nums[2], ksize=(self.cnn_window_sizes[2], self.a.embed_dim), stride=(self.cnn_stride_sizes[2], self.a.embed_dim)) self.conv3 = L.Convolution2D( 1, self.cnn_filter_nums[3], ksize=(self.cnn_window_sizes[3], self.a.embed_dim), stride=(self.cnn_stride_sizes[3], self.a.embed_dim)) self.conv4 = L.Convolution2D( 1, self.cnn_filter_nums[4], ksize=(self.cnn_window_sizes[4], self.a.embed_dim), stride=(self.cnn_stride_sizes[4], self.a.embed_dim)) self.conv5 = L.Convolution2D( 1, self.cnn_filter_nums[5], ksize=(self.cnn_window_sizes[5], self.a.embed_dim), stride=(self.cnn_stride_sizes[5], self.a.embed_dim)) self.cnn_output_dim = sum(self.cnn_filter_nums) # pooling前のBatchNormalization self.bnorm0 = L.BatchNormalization(self.cnn_filter_nums[0]) self.bnorm1 = L.BatchNormalization(self.cnn_filter_nums[1]) self.bnorm2 = L.BatchNormalization(self.cnn_filter_nums[2]) self.bnorm3 = L.BatchNormalization(self.cnn_filter_nums[3]) self.bnorm4 = L.BatchNormalization(self.cnn_filter_nums[4]) self.bnorm5 = L.BatchNormalization(self.cnn_filter_nums[5]) # Highway1 self.hw1 = L.Highway( self.cnn_output_dim, activate=F.tanh, init_Wh=initializer, init_Wt=initializer) # BiLSTM self.bi_lstm_dim = self.cnn_output_dim * 2 self.bi_lstm = L.NStepBiLSTM( n_layers=1, in_size=self.cnn_output_dim, out_size=self.cnn_output_dim, dropout=0.0) # Higiway2 + Soft Attention self.hw2 = L.Highway( self.bi_lstm_dim, activate=F.tanh, init_Wh=initializer, init_Wt=initializer) self.u_a = chainer.Parameter(initializer, (1, self.bi_lstm_dim)) # output (+ BatchNormalization) self.fc = L.Linear(self.bi_lstm_dim, 2, initialW=initializer) self.bnorm_last = L.BatchNormalization(2)
def _initialize_params(self, in_size): super(SNLinear, self)._initialize_params(in_size) if self.use_gamma: _, s, _ = np.linalg.svd(self.W.data) with self.init_scope(): self.gamma = chainer.Parameter(s[0], (1, 1))
def train(model_fun, loader, device=0, num_epoch=100, patch_size=61, checkPath=None, pertubation=False, savePath=None): op = operation(device=device) loss = chainer.functions.softmax_cross_entropy optimizer = chainer.optimizers.Adam(alpha=0.0002) embedding_dim = patch_size // 2 + 1 _model = model_fun(device=device, embedding_dim=embedding_dim) if (checkPath is not None): param_dict = load_obj(checkPath) # _model.param_from_dict(param_dict) with device_guard(device): for key, param in _model.namedparams(): param.copydata(chainer.Parameter(cp.asarray(param_dict[key]))) optimizer.setup(_model) with device_guard(device): for epoch in range(num_epoch): if (epoch % 10 == 0): check = epoch / 10 if (check % 2 == 0): print("L1, and only for pooling") for key, l in _model.namedlinks(): if bool(re.search("linear", key)): # print(key, "disable_update") l.disable_update() else: l.enable_update() # optimizer.add_hook(chainer.optimizer.Lasso(rate=0.005), name='lasso') # optimizer.add_hook(Lasso(rate=0.005), name='lasso') optimizer.add_hook(Lasso(rate=0.001), name='lasso') else: print("No regularization, and only for linear") for key, l in _model.namedlinks(): if bool(re.search("pool", key)): # print(key, "disable_update") l.disable_update() else: l.enable_update() optimizer.remove_hook('lasso') # optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.005), name='ridge') start_time = timer() num = 0 count = 0 correct = 0 l = 0 if (pertubation == False): ld_fun = loader.get(epoch, pertube=False) else: ld_fun = loader.periodic_get(epoch) for x, y, index in ld_fun: x, y = op.array(x), op.array(y) x.cast(op.run.float32) x = _model(x) L = wrapper(loss, device, x, y) _model.cleargrads() L.backward() optimizer.update() correct = (op.argmax(x, axis=1) == y).sum() + correct l = Variable(L, device=device) + l num = num + y.shape[0] count = count + 1 # param_dict = _model.param_to_dict() # for key in param_dict.keys(): # print(key) # for name, params in _model.namedparams(): # if name=="/pooling_chain/0/weights": # print(name, params) # print(type(params._data[0].get())) print("epoch: ", epoch, "num ", num, "correct: ", correct / num, "loss: ", l / count, "time: ", timer() - start_time) # param_dict = _model.param_to_dict() param_dict = {} for key, params in _model.namedparams(): # if name=="/pooling_chain/0/weights": # print(name, params) # print(type(params._data[0].get())) param_dict[key] = params.array.get() if (savePath is not None): mkdir(savePath) save_obj(param_dict, savePath + "/{}.pkl".format(epoch))