def __init__(self, x, y, l, window, opt, lr, init_emb, dim_emb, dim_hidden, n_vocab, L2_reg, unit,
                 sim='cos', n_layers=1, activation=tanh):
        self.tr_inputs = [x, y, l]
        self.pr_inputs = [x, y, l]

        self.x = x  # 1D: batch_size * l * 2, 2D: window; elem=word_id
        self.y = y  # 1D: batch_size; elem=label
        self.l = l  # scalar: elem=sentence length

        batch_size = y.shape[0]
        n_cands = x.shape[0] / batch_size / l

        self.pad = build_shared_zeros((1, dim_emb))
        if init_emb is None:
            self.emb = theano.shared(sample_weights(n_vocab - 1, dim_emb))
        else:
            self.emb = theano.shared(init_emb)
        self.E = T.concatenate([self.pad, self.emb], 0)
        self.W_out = theano.shared(sample_weights(dim_hidden, dim_hidden))
        self.params = [self.emb, self.W_out]

        """ Input Layer """
        e = self.E[x]  # e: 1D: batch_size * l * 2, 2D: window, 3D: dim_emb
        x_in = e.reshape((batch_size * n_cands, l, -1))

        """ Intermediate Layer """
        # h: 1D: n_batch * n_cands, 2D: dim_emb
        h, params = cnn.layers(x_in, window, dim_emb, dim_hidden, n_layers, activation)
        self.params.extend(params)

        """ Output Layer """
        h = h.reshape((batch_size, n_cands, -1))
        h_1 = h[T.arange(batch_size), 0]
        h_2 = h[T.arange(batch_size), 1:]
        if sim == 'cos':
            y_score = cosign_similarity(h_1, h_2)
        else:
            y_score = T.batched_dot(T.dot(h_1, self.W_out), h_2.dimshuffle(0, 2, 1))
        y_score_hat = T.max(y_score, 1)

        """ Objective Function """
        self.nll = max_margin_loss(y_score_hat, y_score[T.arange(batch_size), y])
        self.L2_sqr = regularization(self.params)
        self.cost = self.nll + L2_reg * self.L2_sqr / 2.

        """ Optimization """
        if opt == 'adagrad':
            self.update = ada_grad(cost=self.cost, params=self.params, lr=lr)
        elif opt == 'ada_delta':
            self.update = ada_delta(cost=self.cost, params=self.params)
        elif opt == 'adam':
            self.update = adam(cost=self.cost, params=self.params, lr=lr)
        else:
            self.update = sgd(cost=self.cost, params=self.params, lr=lr)

        """ Predicts """
        y_hat = T.argmax(y_score, 1)

        """ Check Accuracies """
        self.correct = T.eq(y_hat, y)
예제 #2
0
    def __init__(self, n_in, n_hidden, activation_fn=T.tanh):
        self.W_ih = theano.shared(sample_weights(n_in, n_hidden))
        self.W_hh = theano.shared(sample_weights(n_hidden, n_hidden))
        self.b_h = theano.shared(np.zeros(n_hidden, dtype=dtype))

        self.h0 = theano.shared(np.zeros((1, n_hidden), dtype=dtype))

        self.params = [self.W_ih,
                        self.W_hh,
                        self.b_h,
                        self.h0]

        self.activation_fn = activation_fn
예제 #3
0
    def train_dataloader(self):

        transf = self.default_transforms() if self.train_transforms is None else self.train_transforms

        dataset = self.DATASET(self.data_dir, train=True, download=False,
                               transform=transf, **self.extra_args)
        train_length = len(dataset)
        dataset_train, _ = random_split(
            dataset,
            [train_length - self.val_split, self.val_split],
            generator=torch.Generator().manual_seed(self.seed)
        )

        s_weights = utils.sample_weights(np.asarray(
            dataset_train.dataset.targets)[dataset_train.indices])

        if self.accelerator == 'ddp' or self.accelerator == 'ddp2':
            sampler = None  # DistributedSampler(dataset_train)
            shuffle = False
        else:
            sampler = WeightedRandomSampler(s_weights,
                                            num_samples=len(s_weights), replacement=True)
            shuffle = False

        loader = DataLoader(
            dataset_train,
            batch_size=self.batch_size,
            shuffle=shuffle,
            sampler=sampler,
            num_workers=self.num_workers,
            drop_last=True,
            pin_memory=True
        )
        return loader
예제 #4
0
    def __init__(self, n_in, n_hidden, n_batch=1, init_state_params=True, activation_fn=T.tanh):
        self.W = theano.shared(sample_weights(n_in, 3*n_hidden))
        self.T = theano.shared(sample_weights(n_hidden, 3*n_hidden))
        self.b = theano.shared(np.zeros(3*n_hidden, dtype=dtype))

        self.params = [self.W,
                        self.T,
                        self.b]

        self.h0 = theano.shared(np.zeros((n_batch, n_hidden), dtype=dtype))
        self.c0 = theano.shared(np.zeros((n_batch, n_hidden), dtype=dtype))
        if init_state_params:
            self.params += [self.h0, self.c0]

        self.n_in = n_in
        self.n_hidden = n_hidden
        self.n_batch = n_batch
        self.activation_fn = activation_fn
예제 #5
0
def layers(x, window, dim_emb, dim_hidden, n_layers, activation=tanh):
    params = []
    zero = T.zeros((1, 1, dim_emb * window), dtype=theano.config.floatX)

    def zero_pad_gate(matrix):
        return T.neq(T.sum(T.eq(matrix, zero), 2, keepdims=True), dim_emb * window)

    for i in xrange(n_layers):
        if i == 0:
            W = theano.shared(sample_weights(dim_emb * window, dim_hidden))
            h = T.max(zero_pad_gate(x) * relu(T.dot(x, W)), 1)
#            h = T.max(T.dot(x, W), 1)
        else:
            W = theano.shared(sample_weights(dim_hidden, dim_hidden))
            h = activation(T.dot(h, W))
        params.append(W)

    return h, params
예제 #6
0
def layers(x, window, dim_emb, dim_hidden, n_layers, activation=tanh):
    params = []
    zero = T.zeros((1, 1, dim_emb * window), dtype=theano.config.floatX)

    def zero_pad_gate(matrix):
        return T.neq(T.sum(T.eq(matrix, zero), 2, keepdims=True),
                     dim_emb * window)

    for i in xrange(n_layers):
        if i == 0:
            W = theano.shared(sample_weights(dim_emb * window, dim_hidden))
            h = T.max(zero_pad_gate(x) * relu(T.dot(x, W)), 1)
#            h = T.max(T.dot(x, W), 1)
        else:
            W = theano.shared(sample_weights(dim_hidden, dim_hidden))
            h = activation(T.dot(h, W))
        params.append(W)

    return h, params
예제 #7
0
    def __init__(self, n_filters, stack_size, n_row, n_col, 
                    stride=(1,1), border_mode='valid', activation_fn=T.tanh):
        W_init = sample_weights(stack_size*n_row*n_col, n_filters).T
        W_init = W_init.reshape(n_filters, stack_size, n_row, n_col)
        self.W_ih = theano.shared(W_init.astype(dtype))

        W_init = sample_weights(n_filters*n_row*n_col, n_filters).T
        W_init = W_init.reshape(n_filters, n_filters, n_row, n_col)
        self.W_hh = theano.shared(W_init.astype(dtype))

        self.b_h = theano.shared(np.zeros(n_filters, dtype=dtype))

        self.params = [self.W_ih,
                        self.W_hh,
                        self.b_h]

        self.border_mode = border_mode
        self.stride = stride
        self.activation_fn = activation_fn
예제 #8
0
    def __init__(self, n_filters, stack_size, n_row, n_col, 
                    stride=(1,1), border_mode='valid'):
        W_init = sample_weights(stack_size*n_row*n_col, n_filters).T
        W_init = W_init.reshape(n_filters, stack_size, n_row, n_col)
        self.W = theano.shared(W_init.astype(dtype))
        self.b = theano.shared(np.zeros(n_filters, dtype=dtype))
        self.params = [self.W, self.b]

        self.border_mode = border_mode
        self.stride = stride
예제 #9
0
    def __init__(self, n_in, n_hidden, n_batch=1, init_state_params=True, activation_fn=T.tanh):
        self.W = theano.shared(sample_weights(n_in, n_hidden))
        self.U = theano.shared(sample_weights(n_hidden, n_hidden))
        self.b = theano.shared(np.zeros(n_hidden, dtype=dtype))

        self.Wz = theano.shared(sample_weights(n_in, n_hidden))
        self.Uz = theano.shared(sample_weights(n_hidden, n_hidden))
        self.bz = theano.shared(np.zeros(n_hidden, dtype=dtype))

        self.Wr = theano.shared(sample_weights(n_in, n_hidden))
        self.Ur = theano.shared(sample_weights(n_hidden, n_hidden))
        self.br = theano.shared(np.zeros(n_hidden, dtype=dtype))

        self.params = [self.W,
                        self.U,
                        self.b,
                        self.Wz,
                        self.Uz,
                        self.bz,
                        self.Wr,
                        self.Ur,
                        self.br]

        self.h0 = theano.shared(np.zeros((n_batch, n_hidden), dtype=dtype))
        if init_state_params:
            self.params += [self.h0]

        self.n_batch = n_batch
        self.activation_fn = activation_fn
예제 #10
0
    def __init__(self,
                 n_filters,
                 stack_size,
                 n_row,
                 n_col,
                 stride=(1, 1),
                 border_mode='valid',
                 activation_fn=T.tanh):
        W_init = sample_weights(stack_size * n_row * n_col, n_filters).T
        W_init = W_init.reshape(n_filters, stack_size, n_row, n_col)
        self.W_ih = theano.shared(W_init.astype(dtype))

        W_init = sample_weights(n_filters * n_row * n_col, n_filters).T
        W_init = W_init.reshape(n_filters, n_filters, n_row, n_col)
        self.W_hh = theano.shared(W_init.astype(dtype))

        self.b_h = theano.shared(np.zeros(n_filters, dtype=dtype))

        self.params = [self.W_ih, self.W_hh, self.b_h]

        self.border_mode = border_mode
        self.stride = stride
        self.activation_fn = activation_fn
예제 #11
0
    def __init__(self, n_in, n_hidden, n_batch=None, activation_fn=T.tanh):
        self.W = theano.shared(sample_weights(n_in, 4*n_hidden))
        self.T = theano.shared(sample_weights(n_hidden, 4*n_hidden))
        self.b = theano.shared(np.zeros(4*n_hidden, dtype=dtype))

        self.params = [self.W,
                        self.T,
                        self.b]

        if n_batch is None:
            self.h0 = theano.shared(np.zeros((1, n_hidden), dtype=dtype))
            self.c0 = theano.shared(np.zeros((1, n_hidden), dtype=dtype))
            self.params += [self.h0, self.c0]
            self.h_init = [T.tile(self.h0, (v.shape[1], 1)),
                            T.tile(self.c0, (v.shape[1], 1))]
        else:
            self.h0 = theano.shared(np.zeros((n_batch, n_hidden), dtype=dtype))
            self.c0 = theano.shared(np.zeros((n_batch, n_hidden), dtype=dtype))
            self.h_init = [self.h0, self.c0]

        self.n_in = n_in
        self.n_hidden = n_hidden
        self.activation_fn = activation_fn
예제 #12
0
    def __init__(self,
                 n_filters,
                 stack_size,
                 n_row,
                 n_col,
                 stride=(1, 1),
                 border_mode='valid'):
        W_init = sample_weights(stack_size * n_row * n_col, n_filters).T
        W_init = W_init.reshape(n_filters, stack_size, n_row, n_col)
        self.W = theano.shared(W_init.astype(dtype))
        self.b = theano.shared(np.zeros(n_filters, dtype=dtype))
        self.params = [self.W, self.b]

        self.border_mode = border_mode
        self.stride = stride
예제 #13
0
    def __init__(self,
                 w,
                 d,
                 n_layers,
                 vocab_size,
                 n_in=32,
                 n_h=32,
                 n_words=1000,
                 batch_size=32,
                 activation=tanh):

        self.w = w
        self.d = d

        """model parameters"""
        self.n_layers = n_layers
        self.vocab_size = vocab_size
        self.n_in = n_in
        self.n_h = n_h
        self.n_y = vocab_size
        self.n_words = n_words
        self.batch_size = batch_size
        self.activation = activation

        """embeddings"""
        self.emb = theano.shared(sample_weights(self.vocab_size, self.n_in))

        """initial parameters"""
        self.x = self.emb[self.w]  # x: 1D: n_words * batch_size, 2D: n_in

        self.c0 = theano.shared(np.zeros((self.batch_size, n_h), dtype=theano.config.floatX))
        self.h0 = self.activation(self.c0)

        """layers and parameters"""
        self.layers, self.params, self.layer_output = self.layers(n_layers=n_layers)

        self.y = self.layer_output[-1]  # y: 1D: n_words, 2D: batch_size, 3D: vocab_size
        if n_layers % 2 == 0:
            self.y = self.y[::-1]

        self.p_y_given_x = self.y.dimshuffle((1, 0, 2)).reshape((n_words * batch_size, vocab_size))
        self.nll = -T.mean(T.log(self.p_y_given_x)[T.arange(d.shape[0]), d])
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        self.errors = T.neq(self.y_pred, d)
예제 #14
0
    def __init__(self, n_i=32, n_h=32, activation=tanh):

        self.activation = activation
        """input gate parameters"""
        self.W_xi = theano.shared(sample_weights(n_i, n_h))
        self.W_hi = theano.shared(sample_weights(n_h, n_h))
        """forget gate parameters"""
        self.W_xf = theano.shared(sample_weights(n_i, n_h))
        self.W_hf = theano.shared(sample_weights(n_h, n_h))
        """cell parameters"""
        self.W_xc = theano.shared(sample_weights(n_i, n_h))
        self.W_hc = theano.shared(sample_weights(n_h, n_h))
        """output gate parameters"""
        self.W_xo = theano.shared(sample_weights(n_i, n_h))
        self.W_ho = theano.shared(sample_weights(n_h, n_h))

        self.params = [
            self.W_xi, self.W_hi, self.W_xf, self.W_hf, self.W_xc, self.W_hc,
            self.W_xo, self.W_ho
        ]
예제 #15
0
    def __init__(self, n_i=32, n_h=32, activation=tanh):

        self.activation = activation

        """input gate parameters"""
        self.W_xi = theano.shared(sample_weights(n_i, n_h))
        self.W_hi = theano.shared(sample_weights(n_h, n_h))

        """forget gate parameters"""
        self.W_xf = theano.shared(sample_weights(n_i, n_h))
        self.W_hf = theano.shared(sample_weights(n_h, n_h))

        """cell parameters"""
        self.W_xc = theano.shared(sample_weights(n_i, n_h))
        self.W_hc = theano.shared(sample_weights(n_h, n_h))

        """output gate parameters"""
        self.W_xo = theano.shared(sample_weights(n_i, n_h))
        self.W_ho = theano.shared(sample_weights(n_h, n_h))

        self.params = [self.W_xi, self.W_hi, self.W_xf, self.W_hf, self.W_xc, self.W_hc, self.W_xo, self.W_ho]
예제 #16
0
    def __init__(self, n_in, n_hidden, activation_fn=T.tanh):
        n_i = n_c = n_o = n_f = n_hidden

        self.W_xi = theano.shared(sample_weights(n_in, n_i))
        self.W_hi = theano.shared(sample_weights(n_hidden, n_i))
        self.W_ci = theano.shared(sample_weights(n_c, n_i))
        self.b_i = theano.shared(np.zeros(n_i, dtype=dtype))

        self.W_xf = theano.shared(sample_weights(n_in, n_f))
        self.W_hf = theano.shared(sample_weights(n_hidden, n_f))
        self.W_cf = theano.shared(sample_weights(n_c, n_f))
        self.b_f = theano.shared(np.zeros(n_f, dtype=dtype))
        
        self.W_xc = theano.shared(sample_weights(n_in, n_c))
        self.W_hc = theano.shared(sample_weights(n_hidden, n_c))
        self.b_c = theano.shared(np.zeros(n_c, dtype=dtype))
        
        self.W_xo = theano.shared(sample_weights(n_in, n_o))
        self.W_ho = theano.shared(sample_weights(n_hidden, n_o))
        self.W_co = theano.shared(sample_weights(n_c, n_o))
        self.b_o = theano.shared(np.zeros(n_o, dtype=dtype))

        self.c0 = theano.shared(np.zeros((1, n_hidden), dtype=dtype))
        self.h0 = theano.shared(np.zeros((1, n_hidden), dtype=dtype))

        self.params = [self.W_xi,
                        self.W_hi,
                        self.W_ci,
                        self.b_i,
                        self.W_xf,
                        self.W_hf,
                        self.W_cf,
                        self.b_f,
                        self.W_xc,
                        self.W_hc,
                        self.b_c,
                        self.W_xo,
                        self.W_ho,
                        self.W_co,
                        self.b_o,
                        self.c0,
                        self.h0]

        self.activation_fn = activation_fn
예제 #17
0
	def __init__(self, model_settings):
		#
		self.dim_model = model_settings['dim_model']  # 100
		self.dim_world = model_settings['dim_world']  # 78
		# it is the dim of raw world input
		# raw world input is NOT one-hot vector
		self.dim_lang = model_settings['dim_lang']  # 524
		self.dim_action = model_settings['dim_action']  # 4
		# theano.tensor.var.TensorVariable ,.tensor.var.TensorVariable
		# drop_out related stuff
		self.drop_out_rate = model_settings['drop_out_rate']  # 0.9
		assert (
			self.drop_out_rate <= numpy.float32(1.0)
		)
		self.rnd_gen = RandomStreams(seed=12345)
		self.drop_out_layer = self.rnd_gen.uniform(
				(self.dim_model,)) < self.drop_out_rate  # List of boolean variable of shape [100]
		# print "drop_out_layer=",self.drop_out_layer.eval()  # [100]

		"""
        drop_out_layer= [ True  True False  True  True  True  True  True  True  True  True  True
                        True  True  True False  True  True  True False  True  True  True  True
                        True  True  True  True  True  True  True False  True  True  True  True
                        True  True  True  True  True  True  True False  True False  True False
                        True  True  True  True  True  True  True  True  True False  True  True
                        True  True  True  True  True  True  True  True  True  True  True  True
                        True  True  True  True  True  True  True False  True  True  True  True
                        True  True  True  True  True  True  True False  True  True  True  True
                        False  True  True  True]
        """
		self.drop_out_layer_gen = theano.function(
				[], self.drop_out_layer
		)
		#
		#
		print "dim of model, world, lang and action is : ", self.dim_model, self.dim_world, self.dim_lang, self.dim_action
		#
		""" identity matrix of shape [524*524] """
		self.Emb_lang_sparse = theano.shared(
				numpy.identity(self.dim_lang, dtype=dtype),
				name='Emb_lang_sparse'
		)
		# print "self.Emb_lang_sparse=",self.Emb_lang_sparse.shape.eval()
		# this is the I-matrix that stands for idx of tokens
		#
		""" Matrix of shape [524*100] """
		self.Emb_enc_forward = theano.shared(
				utils.sample_weights(self.dim_lang, self.dim_model),
				name='Emb_enc_forward'
		)
		# print "self.Emb_enc_forward=",self.Emb_enc_forward.eval()," ", self.Emb_enc_forward.shape.eval()

		""" Matrix of shape [200*400] """
		self.W_enc_forward = theano.shared(
				utils.sample_weights(
						2 * self.dim_model, 4 * self.dim_model
				), name='W_enc_forward'
		)
		# print "self.W_enc_forward=",self.W_enc_forward.eval()," ", self.W_enc_forward.shape.eval()

		""" Matrix of shape [400] """
		self.b_enc_forward = theano.shared(
				numpy.zeros((4 * self.dim_model,), dtype=dtype),
				name='b_enc_forward'
		)
		# print "self.b_enc_forward=",self.b_enc_forward.eval()," ", self.b_enc_forward.shape.eval()
		#
		""" Matrix of shape [524*100] """
		self.Emb_enc_backward = theano.shared(
				utils.sample_weights(self.dim_lang, self.dim_model),
				name='Emb_enc_backward'
		)

		""" Matrix of shape [200*400] """
		self.W_enc_backward = theano.shared(
				utils.sample_weights(
						2 * self.dim_model, 4 * self.dim_model
				), name='W_enc_backward'
		)

		""" Matrix of shape [400] """
		self.b_enc_backward = theano.shared(
				numpy.zeros((4 * self.dim_model,), dtype=dtype),
				name='b_enc_backward'
		)
		#
		""" Matrix of shape [724*100] """
		self.W_att_scope = theano.shared(
				utils.sample_weights(
						self.dim_lang + 2 * self.dim_model, self.dim_model
				), name='W_att_scope'
		)

		""" Matrix of shape [100*100] """
		self.W_att_target = theano.shared(
				utils.sample_weights(
						self.dim_model, self.dim_model
				), name='W_att_target'
		)
		# print "self.W_att_target=",self.W_att_target.eval()," ", self.W_att_target.shape.eval()

		""" Matrix of shape [100] """
		self.b_att = theano.shared(
				numpy.zeros((self.dim_model,), dtype=dtype),
				name='b_att'
		)
		#
		""" Matrix of shape [78*100] """
		self.Emb_dec = theano.shared(
				utils.sample_weights(self.dim_world, self.dim_model),
				name='Emb_dec'
		)

		""" Matrix of shape [924*400] """
		self.W_dec = theano.shared(
				utils.sample_weights(
						self.dim_lang + 4 * self.dim_model, 4 * self.dim_model
				), name='W_dec'
		)
		# print "self.W_dec=",self.W_dec.eval()," ", self.W_dec.shape.eval()

		""" Matrix of shape [400] """
		self.b_dec = theano.shared(
				numpy.zeros((4 * self.dim_model,), dtype=dtype),
				name='b_dec'
		)
		#

		""" Matrix of shape [824*100] """
		self.W_out_hz = theano.shared(
				utils.sample_weights(
						self.dim_lang + 3 * self.dim_model, self.dim_model
				), name='W_out_hz'
		)

		""" Matrix of shape [100*4] """
		self.W_out = theano.shared(
				utils.sample_weights(
						self.dim_model, self.dim_action
				), name='W_out'
		)
		#

		""" Matrix of shape [100] """
		self.c0 = theano.shared(
				numpy.zeros((self.dim_model,), dtype=dtype),
				name='c0'
		)

		""" Matrix of shape [100] """
		self.h0 = theano.shared(
				numpy.zeros((self.dim_model,), dtype=dtype),
				name='h0'
		)
		#
		self.params = [
			self.Emb_enc_forward,
			self.W_enc_forward, self.b_enc_forward,
			self.Emb_enc_backward,
			self.W_enc_backward, self.b_enc_backward,
			#
			self.W_att_scope, self.W_att_target, self.b_att,
			self.Emb_dec,
			self.W_dec, self.b_dec,
			self.W_out_hz, self.W_out
		]
		#
		self.cost = None
		self.grad_params = None
		self.log_prob = None
                                     verbose=1)
    early_stop = cb.EarlyStopping(patience=10,
                                  restore_best_weights=True,
                                  monitor=args.metrics,
                                  verbose=1)
    training = model.fit(train_sample,
                         train_labels,
                         validation_data=(valid_sample, valid_labels),
                         callbacks=[check_point, early_stop],
                         epochs=args.n_epochs,
                         verbose=args.verbose,
                         class_weight=None if args.n_classes == 2 else
                         class_weights(train_labels),
                         sample_weight=sample_weights(train_sample,
                                                      train_labels,
                                                      args.n_classes,
                                                      args.weight_type,
                                                      args.output_dir),
                         batch_size=max(1, n_gpus) * int(args.batch_size))
    model.load_weights(weight_file)
else:
    train_labels = []
    training = None

# RESULTS AND PLOTTING SECTION
if args.cross_valid == 'ON':
    valid_probs = cross_validation(valid_sample, valid_labels, scalars, model,
                                   args.output_dir, args.n_folds)
    print('MERGING ALL FOLDS AND PREDICTING CLASSES ...')
if args.cross_valid == 'OFF':
    print('\nValidation sample', args.n_valid, 'class predictions:')
예제 #19
0
    def __init__(self, n_in):
        self.gamma = theano.shared(np.squeeze(sample_weights(1,n_in)))
        self.beta = theano.shared(np.zeros(n_in, dtype=dtype))

        self.params = [self.gamma, self.beta]
예제 #20
0
    def __init__(self, n_in, n_output, selection_threshold=1.):
        self.W = theano.shared(sample_weights(n_in, n_output))
        self.params = [self.W]

        self.selection_threshold = selection_threshold
예제 #21
0
    def __init__(self, n_i=32, n_h=45):

        self.W = theano.shared(sample_weights(n_i, n_h))
        self.params = [self.W]
예제 #22
0
    func_args = (data_file, total_var, args.n_train, args.n_tracks, args.n_classes, args.train_cuts)
    train_sample, train_labels = make_sample(*func_args); sample_composition(train_sample)
    if args.resampling == 'ON': train_sample, train_labels = balance_sample(train_sample, train_labels)
    if args.scaling and args.model_in != '':
        train_sample = load_scaler(train_sample, scalars, args.output_dir+'/'+args.scaler_in)
    if args.scaling and args.model_in == '':
        scaler_out = args.output_dir+'/'+args.scaler_out
        train_sample, valid_sample = apply_scaler(train_sample, valid_sample, scalars, scaler_out)
    compo_matrix(valid_labels, train_labels=train_labels); print()
    model_out   = args.output_dir+'/'+args.model_out
    check_point = cb.ModelCheckpoint(model_out, save_best_only      =True, monitor=args.metrics, verbose=1)
    early_stop  = cb.EarlyStopping(patience=10, restore_best_weights=True, monitor=args.metrics, verbose=1)
    training = model.fit( train_sample, train_labels, validation_data=(valid_sample,valid_labels),
                          callbacks=[check_point,early_stop], epochs=args.n_epochs, verbose=args.verbose,
                          class_weight=class_weights(train_labels),
                          sample_weight=sample_weights(train_sample, train_labels, args.n_classes,
                          args.weight_type, args.output_dir), batch_size=max(1,n_gpus)*int(args.batch_size) )
    model.load_weights(model_out)
else: train_labels = []; training = None


# RESULTS AND PLOTTING SECTION
if args.cross_valid == 'ON':
    valid_probs = cross_validation(valid_sample, valid_labels, scalars, model, args.output_dir, args.n_folds)
    print('MERGING ALL FOLDS AND PREDICTING CLASSES ...')
if args.cross_valid == 'OFF':
    print('\nValidation sample', args.n_valid, 'class predictions:')
    valid_probs = model.predict(valid_sample, batch_size=20000, verbose=args.verbose); print()
valid_results(valid_sample, valid_labels, valid_probs, train_labels, training, args.output_dir, args.plotting)
if args.results_out != '':
    print('Saving validation results to:', args.output_dir+'/'+args.results_out, '\n')
    valid_sample = {key:valid_sample[key] for key in other_var}
예제 #23
0
def federated_learning(communication_rounds=1, epochs_per_round=1, saving=False,
                       sampling_idx_layers=None, sampling_idx_all=None):
    client_list, sampling_types, samples_data_loaders = get_sample_data_loaders()
    client_names = np.array(['Client-{}'.format(i) for i in range(4)])
    train_loaders = []
    test_loaders = []
    for client_name in client_names:
        train_loader, test_loader = get_cifar_data_loader(client_name, batch_size=batch_size)
        train_loaders.append(train_loader)
        test_loaders.append(test_loader)

    # Initiate Parameters
    server = Server(start_round=0, checkpoint_path=os.path.join(CHECKPOINTS_DIR, 'Server'), device=device)

    n_paras = sum(p.numel() for p in server.model.parameters())
    print('Total n_paras: {}'.format(n_paras))

    layer_names = server.model.layer_names
    n_layers = len(layer_names)
    info_file = os.path.join(HISTORY_DIR, 'model_info')
    np.savez_compressed(info_file, layer_names=layer_names)

    federated_clients = []

    for client_name, train_loader, test_loader in zip(client_names, train_loaders, test_loaders):
        federated_clients.append(Client(client_name=client_name,
                                        checkpoint_path=os.path.join(CHECKPOINTS_DIR, client_name),
                                        train_loader=train_loader, test_loader=test_loader, device=device))

    weights0_layers, weights0_all = sample_weights(server.model, sampling_idx_layers, sampling_idx_all)
    weights0_file = os.path.join(WEIGHTS_DIR, 'weights_0')
    np.savez_compressed(weights0_file, layers=weights0_layers, all=weights0_all)
    torch.save(server.model.state_dict(), os.path.join(CHECKPOINTS_DIR, 'model_0.cp'))

    cosines = {}
    for client_name in client_list:
        cosines[client_name] = [[] for _ in range(n_layers + 1)]

    total_accuracy_list = [[] for _ in client_list]

    last_time = time()
    # Start federated learning
    for i in range(communication_rounds):
        print('Communication Round {} | Time: {}'.format(i, time() - last_time))
        last_time = time()

        pre_model = CIFARModel().cuda()
        pre_model.load_state_dict(server.model.state_dict())

        global_parameters = server.get_parameters()
        local_parameters = []
        # Federated Learning
        for client in federated_clients:
            client.set_parameters(global_parameters)
            client.run(n_epochs=epochs_per_round, save_last=True)
            local_parameters.append(client.get_parameters())

        server.aggregate(local_parameters)
        server.save(suffix='_r{}'.format(i))

        if saving:
            server_weights_layers, server_weights_all = sample_weights(server.model,
                                                                       sampling_idx_layers, sampling_idx_all)
            weights_file = os.path.join(WEIGHTS_DIR, 'Server_r{}'.format(i))
            np.savez_compressed(weights_file, layers=server_weights_layers, all=server_weights_all)

            for client_id, client_name in enumerate(client_list):
                client = federated_clients[np.where(client_names == client_name)[0][0]]
                model = server.model
                model.eval()
                results = {}

                for sampling_type in sampling_types:
                    print('Predicting {} {}'.format(client_name, sampling_type))
                    data_loader = samples_data_loaders[sampling_type][client_id]
                    predictions = []
                    total, correct = 0, 0

                    with torch.no_grad():
                        for inputs, labels in data_loader:
                            inputs = inputs.float().to(device)
                            labels = labels.long().to(device)
                            outputs = model(inputs)
                            _, predicted = torch.max(outputs.data, 1)
                            predictions.append(predicted.cpu().numpy())
                            if sampling_type == 'local':
                                total += inputs.size(0)
                                correct += (predicted == labels).sum().item()
                    results[sampling_type] = np.concatenate(predictions)
                    if sampling_type == 'local':
                        print('    Total Acc:', correct / total)
                        total_accuracy_list[client_id].append(correct / total)

                output_file = os.path.join(OUTPUTS_DIR, '{}_Server_r{}'.format(client_name, i))
                np.savez_compressed(output_file, **results)

                client_weights_layers, client_weights_all = sample_weights(client.model,
                                                                           sampling_idx_layers, sampling_idx_all)

                weights_file = os.path.join(WEIGHTS_DIR, '{}_r{}'.format(client_name, i))
                np.savez_compressed(weights_file, layers=client_weights_layers, all=client_weights_all)

                update_cosines(pre_model, client.model, server.model, cosines[client_name])

        # Test
        for client in federated_clients:
            if client.name in client_list:
                client.set_parameters(server.get_parameters())
                client.test()

    loss_list = [client.history['loss'] for client in federated_clients if client.name in client_list]
    val_acc_list = [client.history['val_acc'] for client in federated_clients if client.name in client_list]
    np.savez_compressed(VAL_FILE, client_names=client_names, loss=loss_list, val_acc=val_acc_list,
                        tot_acc=total_accuracy_list)

    cosines_file = os.path.join(WEIGHTS_DIR, 'cosines')
    np.savez_compressed(cosines_file, **cosines)
예제 #24
0
 def __init__(self, settings):
     print "initializing Sel Gen model ... "
     self.size_batch = settings['size_batch']
     self.num_sel = numpy.float32(settings['num_sel'])
     self.coef = numpy.float32(settings['coef'])
     #
     if settings['path_pre_train'] == None:
         self.dim_model = settings['dim_model']
         self.dim_lang = settings['dim_lang']
         self.dim_info = settings['dim_info']
         # initialize variables
         self.Emb_enc_forward = theano.shared(utils.sample_weights(
             self.dim_info, self.dim_model),
                                              name='Emb_enc_forward')
         self.Emb_enc_backward = theano.shared(utils.sample_weights(
             self.dim_info, self.dim_model),
                                               name='Emb_enc_backward')
         self.W_enc_forward = theano.shared(utils.sample_weights(
             2 * self.dim_model, 4 * self.dim_model),
                                            name='W_enc_forward')
         self.W_enc_backward = theano.shared(utils.sample_weights(
             2 * self.dim_model, 4 * self.dim_model),
                                             name='W_enc_backward')
         self.b_enc_forward = theano.shared(numpy.zeros(
             (4 * self.dim_model, ), dtype=dtype),
                                            name='b_enc_forward')
         self.b_enc_backward = theano.shared(numpy.zeros(
             (4 * self.dim_model, ), dtype=dtype),
                                             name='b_enc_backward')
         #
         self.W_pre_att = theano.shared(utils.sample_weights(
             self.dim_info + 2 * self.dim_model, self.dim_model),
                                        name='W_pre_att')
         self.b_pre_att = theano.shared(numpy.zeros((self.dim_model, ),
                                                    dtype=dtype),
                                        name='b_pre_att')
         #
         self.W_att = theano.shared(utils.sample_weights(
             self.dim_model, self.dim_model),
                                    name='W_att')
         self.U_att = theano.shared(utils.sample_weights(
             self.dim_info + 2 * self.dim_model, self.dim_model),
                                    name='U_att')
         self.b_att = theano.shared(numpy.zeros((self.dim_model, ),
                                                dtype=dtype),
                                    name='b_att')
         #
         self.Emb_dec = theano.shared(utils.sample_weights(
             self.dim_lang, self.dim_model),
                                      name='Emb_dec')
         self.W_dec = theano.shared(utils.sample_weights(
             self.dim_info + 4 * self.dim_model, 4 * self.dim_model),
                                    name='W_dec')
         self.b_dec = theano.shared(numpy.zeros((4 * self.dim_model, ),
                                                dtype=dtype),
                                    name='b_dec')
         self.L_0 = theano.shared(utils.sample_weights(
             self.dim_model, self.dim_lang),
                                  name='L_0')
         self.L = theano.shared(utils.sample_weights(
             self.dim_info + 3 * self.dim_model, self.dim_model),
                                name='L')
         #
     else:
         #
         path_pre_train = os.path.abspath(settings['path_pre_train'])
         with open(path_pre_train, 'rb') as f:
             model_pre_train = pickle.load(f)
         #
         self.Emb_enc_forward = theano.shared(
             model_pre_train['Emb_enc_forward'])
         self.Emb_enc_backward = theano.shared(
             model_pre_train['Emb_enc_backward'])
         self.W_enc_forward = theano.shared(
             model_pre_train['W_enc_forward'])
         self.W_enc_backward = theano.shared(
             model_pre_train['W_enc_backward'])
         self.b_enc_forward = theano.shared(
             model_pre_train['b_enc_forward'])
         self.b_enc_backward = theano.shared(
             model_pre_train['b_enc_backward'])
         #
         self.W_pre_att = theano.shared(model_pre_train['W_pre_att'])
         self.b_pre_att = theano.shared(model_pre_train['b_pre_att'])
         #
         self.W_att = theano.shared(model_pre_train['W_att'])
         self.U_att = theano.shared(model_pre_train['U_att'])
         self.b_att = theano.shared(model_pre_train['b_att'])
         #
         self.Emb_dec = theano.shared(model_pre_train['Emb_dec'])
         self.W_dec = theano.shared(model_pre_train['W_dec'])
         self.b_dec = theano.shared(model_pre_train['b_dec'])
         self.L_0 = theano.shared(model_pre_train['L_0'])
         self.L = theano.shared(model_pre_train['L'])
         #
         self.dim_model = self.Emb_enc_forward.shape[1]
         self.dim_lang = self.Emb_dec.shape[0]
         self.dim_info = self.Emb_enc_forward.shape[0]
         #
     #
     self.h_0_mat = tensor.zeros((self.size_batch, self.dim_model),
                                 dtype=dtype)
     self.c_0_mat = tensor.zeros((self.size_batch, self.dim_model),
                                 dtype=dtype)
     #
     self.params = [
         self.Emb_enc_forward, self.Emb_enc_backward, self.W_enc_forward,
         self.W_enc_backward, self.b_enc_forward, self.b_enc_backward,
         self.W_pre_att, self.b_pre_att, self.W_att, self.U_att, self.b_att,
         self.Emb_dec, self.W_dec, self.b_dec, self.L_0, self.L
     ]
     self.grad_params = None
     self.cost = None
예제 #25
0
    def __init__(self, n_i=32, n_h=45):

        self.W = theano.shared(sample_weights(n_i, n_h))
        self.params = [self.W]
예제 #26
0
 def __init__(self, model_settings):
     #
     self.dim_model = model_settings['dim_model']
     self.dim_world = model_settings['dim_world']
     # it is the dim of raw world input
     # raw world input is NOT one-hot vector
     self.dim_lang = model_settings['dim_lang']
     self.dim_action = model_settings['dim_action']
     #
     print "dim of model, world, lang and action is : ", self.dim_model, self.dim_world, self.dim_lang, self.dim_action
     #
     self.Emb_lang_sparse = theano.shared(
         numpy.identity(self.dim_lang, dtype=dtype),
         name='Emb_lang_sparse'
     )
     # this is the I-matrix that stands for idx of tokens
     #
     self.Emb_enc_forward = theano.shared(
         utils.sample_weights(self.dim_lang, self.dim_model),
         name='Emb_enc_forward'
     )
     self.W_enc_forward = theano.shared(
         utils.sample_weights(
             2*self.dim_model, 4*self.dim_model
         ), name='W_enc_forward'
     )
     self.b_enc_forward = theano.shared(
         numpy.zeros((4*self.dim_model, ), dtype=dtype),
         name='b_enc_forward'
     )
     #
     self.Emb_enc_backward = theano.shared(
         utils.sample_weights(self.dim_lang, self.dim_model),
         name='Emb_enc_backward'
     )
     self.W_enc_backward = theano.shared(
         utils.sample_weights(
             2*self.dim_model, 4*self.dim_model
         ), name='W_enc_backward'
     )
     self.b_enc_backward = theano.shared(
         numpy.zeros((4*self.dim_model, ), dtype=dtype),
         name='b_enc_backward'
     )
     #
     self.W_att_scope = theano.shared(
         utils.sample_weights(
             self.dim_lang+2*self.dim_model, self.dim_model
         ), name='W_att_scope'
     )
     self.W_att_target = theano.shared(
         utils.sample_weights(
             self.dim_model, self.dim_model
         ), name='W_att_target'
     )
     self.b_att = theano.shared(
         numpy.zeros((self.dim_model, ), dtype=dtype),
         name='b_att'
     )
     #
     self.Emb_dec = theano.shared(
         utils.sample_weights(self.dim_world, self.dim_model),
         name='Emb_dec'
     )
     self.W_dec = theano.shared(
         utils.sample_weights(
             self.dim_lang+4*self.dim_model, 4*self.dim_model
         ), name='W_dec'
     )
     self.b_dec = theano.shared(
         numpy.zeros((4*self.dim_model, ), dtype=dtype),
         name='b_dec'
     )
     #
     self.W_out_hz = theano.shared(
         utils.sample_weights(
             self.dim_lang+3*self.dim_model, self.dim_model
         ), name='W_out_hz'
     )
     self.W_out = theano.shared(
         utils.sample_weights(
             self.dim_model, self.dim_action
         ), name='W_out'
     )
     #
     self.c0 = theano.shared(
         numpy.zeros((self.dim_model, ), dtype=dtype),
         name='c0'
     )
     self.h0 = theano.shared(
         numpy.zeros((self.dim_model, ), dtype=dtype),
         name='h0'
     )
     #
     self.params = [
         self.Emb_enc_forward,
         self.W_enc_forward, self.b_enc_forward,
         self.Emb_enc_backward,
         self.W_enc_backward, self.b_enc_backward,
         #
         self.W_att_scope, self.W_att_target, self.b_att,
         self.Emb_dec,
         self.W_dec, self.b_dec,
         self.W_out_hz, self.W_out
     ]
     #
     self.cost = None
     self.grad_params = None
예제 #27
0
    def __init__(self,
                 x,
                 y,
                 l,
                 window,
                 opt,
                 lr,
                 init_emb,
                 dim_emb,
                 dim_hidden,
                 n_vocab,
                 L2_reg,
                 unit,
                 sim='cos',
                 n_layers=1,
                 activation=tanh):
        self.tr_inputs = [x, y, l]
        self.pr_inputs = [x, y, l]

        self.x = x  # 1D: batch_size * l * 2, 2D: window; elem=word_id
        self.y = y  # 1D: batch_size; elem=label
        self.l = l  # scalar: elem=sentence length

        batch_size = y.shape[0]
        n_cands = x.shape[0] / batch_size / l

        self.pad = build_shared_zeros((1, dim_emb))
        if init_emb is None:
            self.emb = theano.shared(sample_weights(n_vocab - 1, dim_emb))
        else:
            self.emb = theano.shared(init_emb)
        self.E = T.concatenate([self.pad, self.emb], 0)
        self.W_out = theano.shared(sample_weights(dim_hidden, dim_hidden))
        self.params = [self.emb, self.W_out]
        """ Input Layer """
        e = self.E[x]  # e: 1D: batch_size * l * 2, 2D: window, 3D: dim_emb
        x_in = e.reshape((batch_size * n_cands, l, -1))
        """ Intermediate Layer """
        # h: 1D: n_batch * n_cands, 2D: dim_emb
        h, params = cnn.layers(x_in, window, dim_emb, dim_hidden, n_layers,
                               activation)
        self.params.extend(params)
        """ Output Layer """
        h = h.reshape((batch_size, n_cands, -1))
        h_1 = h[T.arange(batch_size), 0]
        h_2 = h[T.arange(batch_size), 1:]
        if sim == 'cos':
            y_score = cosign_similarity(h_1, h_2)
        else:
            y_score = T.batched_dot(T.dot(h_1, self.W_out),
                                    h_2.dimshuffle(0, 2, 1))
        y_score_hat = T.max(y_score, 1)
        """ Objective Function """
        self.nll = max_margin_loss(y_score_hat, y_score[T.arange(batch_size),
                                                        y])
        self.L2_sqr = regularization(self.params)
        self.cost = self.nll + L2_reg * self.L2_sqr / 2.
        """ Optimization """
        if opt == 'adagrad':
            self.update = ada_grad(cost=self.cost, params=self.params, lr=lr)
        elif opt == 'ada_delta':
            self.update = ada_delta(cost=self.cost, params=self.params)
        elif opt == 'adam':
            self.update = adam(cost=self.cost, params=self.params, lr=lr)
        else:
            self.update = sgd(cost=self.cost, params=self.params, lr=lr)
        """ Predicts """
        y_hat = T.argmax(y_score, 1)
        """ Check Accuracies """
        self.correct = T.eq(y_hat, y)
예제 #28
0
 def __init__(self, model_settings):
     #
     self.dim_model = model_settings['dim_model']
     self.dim_world = model_settings['dim_world']
     # it is the dim of raw world input
     # raw world input is NOT one-hot vector
     self.dim_lang = model_settings['dim_lang']
     self.dim_action = model_settings['dim_action']
     #
     # drop_out related stuff
     self.drop_out_rate = model_settings['drop_out_rate']
     assert(
         self.drop_out_rate <= numpy.float32(1.0)
     )
     self.rnd_gen = RandomStreams(seed=12345)
     self.drop_out_layer = self.rnd_gen.uniform((self.dim_model,)) < self.drop_out_rate
     self.drop_out_layer_gen = theano.function(
         [], self.drop_out_layer
     )
     #
     #
     print "dim of model, world, lang and action is : ", self.dim_model, self.dim_world, self.dim_lang, self.dim_action
     #
     self.Emb_lang_sparse = theano.shared(
         numpy.identity(self.dim_lang, dtype=dtype),
         name='Emb_lang_sparse'
     )
     # this is the I-matrix that stands for idx of tokens
     #
     self.Emb_enc_forward = theano.shared(
         utils.sample_weights(self.dim_lang, self.dim_model),
         name='Emb_enc_forward'
     )
     self.W_enc_forward = theano.shared(
         utils.sample_weights(
             2*self.dim_model, 4*self.dim_model
         ), name='W_enc_forward'
     )
     self.b_enc_forward = theano.shared(
         numpy.zeros((4*self.dim_model, ), dtype=dtype),
         name='b_enc_forward'
     )
     #
     self.Emb_enc_backward = theano.shared(
         utils.sample_weights(self.dim_lang, self.dim_model),
         name='Emb_enc_backward'
     )
     self.W_enc_backward = theano.shared(
         utils.sample_weights(
             2*self.dim_model, 4*self.dim_model
         ), name='W_enc_backward'
     )
     self.b_enc_backward = theano.shared(
         numpy.zeros((4*self.dim_model, ), dtype=dtype),
         name='b_enc_backward'
     )
     #
     self.W_att_scope = theano.shared(
         utils.sample_weights(
             self.dim_lang+2*self.dim_model, self.dim_model
         ), name='W_att_scope'
     )
     self.W_att_target = theano.shared(
         utils.sample_weights(
             self.dim_model, self.dim_model
         ), name='W_att_target'
     )
     self.b_att = theano.shared(
         numpy.zeros((self.dim_model, ), dtype=dtype),
         name='b_att'
     )
     #
     self.Emb_dec = theano.shared(
         utils.sample_weights(self.dim_world, self.dim_model),
         name='Emb_dec'
     )
     self.W_dec = theano.shared(
         utils.sample_weights(
             self.dim_lang+4*self.dim_model, 4*self.dim_model
         ), name='W_dec'
     )
     self.b_dec = theano.shared(
         numpy.zeros((4*self.dim_model, ), dtype=dtype),
         name='b_dec'
     )
     #
     self.W_out_hz = theano.shared(
         utils.sample_weights(
             self.dim_lang+3*self.dim_model, self.dim_model
         ), name='W_out_hz'
     )
     self.W_out = theano.shared(
         utils.sample_weights(
             self.dim_model, self.dim_action
         ), name='W_out'
     )
     #
     self.c0 = theano.shared(
         numpy.zeros((self.dim_model, ), dtype=dtype),
         name='c0'
     )
     self.h0 = theano.shared(
         numpy.zeros((self.dim_model, ), dtype=dtype),
         name='h0'
     )
     #
     self.params = [
         self.Emb_enc_forward,
         self.W_enc_forward, self.b_enc_forward,
         self.Emb_enc_backward,
         self.W_enc_backward, self.b_enc_backward,
         #
         self.W_att_scope, self.W_att_target, self.b_att,
         self.Emb_dec,
         self.W_dec, self.b_dec,
         self.W_out_hz, self.W_out
     ]
     #
     self.cost = None
     self.grad_params = None
예제 #29
0
 def __init__(self, n_in, n_output):
     self.W = theano.shared(sample_weights(n_in, n_output))
     self.b = theano.shared(np.zeros(n_output, dtype=dtype))
     self.params = [self.W, self.b]