def __init__(self, args, k=4, perc_validation_data=0.5): """ :type args: Argument Parser :param args: A class containing the parameters which were given to the program as command line arguments. :type k: Int :param k: The number of disjoint data link samples. :type perc_validation_data: float :param perc_validation_data: The percentage of the links in the smaller subsample, which is used as the validation data. The remaining '1-perc_validation_data' percent of the link data will be returned as the test data. :return type: A tuple of pairs :return return: D_train, D_C_train, D_valid, D_C_valid, D_test, D_C_test """ self.k = k # Load datasets from memory D = list(load(args.linkdata_pos)) D_C = list(load(args.linkdata_neg)) self.len_D = len(D) self.len_D_C = len(D_C) # Permute datasets np.random.seed(1234) self.D = np.random.permutation(D) self.D_C = np.random.permutation(D_C) # Indices for datasets D and D_C self.len_D_valid = int((self.len_D / float(k)) * perc_validation_data) self.len_D_test = int(self.len_D / float(k) - self.len_D_valid) self.len_D_C_valid = int( (self.len_D_C / float(k)) * perc_validation_data) self.len_D_C_test = int(self.len_D_C / float(k) - self.len_D_C_valid)
def load(self, file): print("Loading trained model from file.") params = load(file) # Test if the parameters have the right format flic_dim, gen_num_layers, gen_num_units, enc_num_layers, enc_num_units = params[ 0] if flic_dim != flic_dim or gen_num_layers != self.args.gen_num_hidden_layers or \ gen_num_units != self.args.gen_num_hidden_units or enc_num_layers != self.args.enc_num_hidden_layers or \ enc_num_units != self.args.enc_num_hidden_units: raise (ValueError, "The dimension of the loaded model are not consistent " "with the dimensions of the command line arguments.") #shared_vars = [theano.shared(np.float64(x.get_value()).astype(theano.config.floatX)) for x in params[1]] start = 0 for layer in self.layers: end = start + len(layer.params) layer.params = params[1][start:end] start = end params_opt = params[2] for model_param, stored_param in zip(self.params_opt, params_opt): model_param.set_value(stored_param.get_value()) self.adaptive_learning_rate.val_error = params[3] self.best_val_error = params[4]
def load(self, file): print("Loading trained model from file.") params = load(file) # Test if the parameters have the right format sent_emb_dim, flic_dim, gen_num_layers, gen_num_units, enc_num_layers, enc_num_units = params[0] if sent_emb_dim != self.sent_emb_dim or flic_dim != flic_dim or gen_num_layers != self.args.gen_num_hidden_layers or \ gen_num_units != self.args.gen_num_hidden_units or enc_num_layers != self.args.enc_num_hidden_layers or \ enc_num_units != self.args.enc_num_hidden_units: raise(ValueError, "The dimension of the loaded model are not consistent " "with the dimensions of the command line arguments.") params_gen = [theano.shared(np.float64(x.get_value()).astype(theano.config.floatX)) for x in params[1]] params_enc = [theano.shared(np.float64(x.get_value()).astype(theano.config.floatX)) for x in params[2]] self.generator.set_params(params_gen) self.encoder.set_params(params_enc) params_opt_g = params[3] params_opt_e = params[4] for model_param, stored_param in zip(self.params_opt_g,params_opt_g): model_param.set_value(stored_param.get_value()) for model_param, stored_param in zip(self.params_opt_e,params_opt_e): model_param.set_value(stored_param.get_value()) self.adaptive_learning_rate.val_error = params[5] self.best_val_error = params[6]
def load_link_data(args, perc_validation_data=0.2, perc_test_data=None): """Loads the link data from the paths given by the command line parameters of the program. The training data is split into training, validation and test data. The following Data gets loaded: D, D_C, prod_to_sent. :type args: Argument Parser :param args: A class containing the parameters which were given to the program as command line arguments. :type perc_validation_data: float :param perc_validation_data: The percentage of the link data which should be in the validation set. :type perc_test_data: float :param perc_test_data: The percentage of the link data which should be in the test set. :type return value: A sextuple. Each item is a list of string-pairs. If 'perc_test_data' == None, then only a quadruple is returned. :return return value: The datasets D, D_C, D_valid, D_C_valid, D_test, D_C_test. If 'perc_test_data' == None, then only D, D_C, D_valid, D_C_valid is returned. Each of these datasets stores a list of pairs. The first string in each pair denotes the object ID of the reference product and the second string denotes the object ID of the target product. """ # Load datasets from memory D = list(load(args.linkdata_pos)) D_C = list(load(args.linkdata_neg)) len_D = len(D) len_DC = len(D_C) # Permute datasets np.random.seed(1234) D = np.random.permutation(D) D_C = np.random.permutation(D_C) # Split the data in training, validation and test set. end_D_valid = int(len_D * perc_validation_data) end_DC_valid = int(len_DC * perc_validation_data) end_D_test = int(end_D_valid + len_D * perc_test_data) end_DC_test = int(end_DC_valid + len_DC * perc_test_data) return D[end_D_test:], D_C[end_DC_test:], \ D[:end_D_valid], D_C[:end_DC_valid], \ D[end_D_valid:end_D_test], D_C[end_DC_valid:end_DC_test]
def load_reviews(args): """Loads the reviews data set. :type args: Argument Parser :param args: A class containing the parameters which were given to the program as command line arguments. """ reviews = load(args.reviews) return reviews
def __init__(self, args, k=4, perc_validation_data=0.5): """ :type args: Argument Parser :param args: A class containing the parameters which were given to the program as command line arguments. :type k: Int :param k: The number of disjoint data link samples. :type perc_validation_data: float :param perc_validation_data: The percentage of the links from the training part of the subsample, which is used as the validation data. :return type: A tuple of pairs :return return: D_train, D_C_train, D_valid, D_C_valid, D_test, D_C_test """ self.k = k self.perc_validation_data = perc_validation_data # Load datasets from memory self.D = list(load(args.linkdata_pos)) self.D_C = list(load(args.linkdata_neg)) # Get list of products prods = {prod for prod, _ in self.D} prods = list(prods) # Permute product np.random.seed(1234) prods = np.random.permutation(prods) # Split products for the cross-validation runs num, rem = divmod(len(prods), k) self.prods = [ set(prods[i * num + min(i, rem):(i + 1) * num + min(i + 1, rem)]) for i in range(k) ]