def __init__(self, args, k=4, perc_validation_data=0.5):
        """
        :type args: Argument Parser
        :param args: A class containing the parameters which were given to the program as command line arguments.

        :type k: Int
        :param k: The number of disjoint data link samples.
        :type perc_validation_data: float
        :param perc_validation_data: The percentage of the links in the smaller subsample, which is used as the
        validation data. The remaining '1-perc_validation_data' percent of the link data will be returned as the test data.

        :return type: A tuple of pairs
        :return return: D_train, D_C_train, D_valid, D_C_valid, D_test, D_C_test
        """

        self.k = k

        # Load datasets from memory
        D = list(load(args.linkdata_pos))
        D_C = list(load(args.linkdata_neg))
        self.len_D = len(D)
        self.len_D_C = len(D_C)

        # Permute datasets
        np.random.seed(1234)
        self.D = np.random.permutation(D)
        self.D_C = np.random.permutation(D_C)

        # Indices for datasets D and D_C
        self.len_D_valid = int((self.len_D / float(k)) * perc_validation_data)
        self.len_D_test = int(self.len_D / float(k) - self.len_D_valid)
        self.len_D_C_valid = int(
            (self.len_D_C / float(k)) * perc_validation_data)
        self.len_D_C_test = int(self.len_D_C / float(k) - self.len_D_C_valid)
示例#2
0
    def load(self, file):
        print("Loading trained model from file.")
        params = load(file)

        # Test if the parameters have the right format
        flic_dim, gen_num_layers, gen_num_units, enc_num_layers, enc_num_units = params[
            0]
        if flic_dim != flic_dim or gen_num_layers != self.args.gen_num_hidden_layers or \
            gen_num_units != self.args.gen_num_hidden_units or enc_num_layers != self.args.enc_num_hidden_layers or \
            enc_num_units != self.args.enc_num_hidden_units:
            raise (ValueError,
                   "The dimension of the loaded model are not consistent "
                   "with the dimensions of the command line arguments.")

        #shared_vars = [theano.shared(np.float64(x.get_value()).astype(theano.config.floatX)) for x in params[1]]
        start = 0
        for layer in self.layers:
            end = start + len(layer.params)
            layer.params = params[1][start:end]
            start = end

        params_opt = params[2]
        for model_param, stored_param in zip(self.params_opt, params_opt):
            model_param.set_value(stored_param.get_value())

        self.adaptive_learning_rate.val_error = params[3]
        self.best_val_error = params[4]
    def load(self, file):
        print("Loading trained model from file.")
        params = load(file)

        # Test if the parameters have the right format
        sent_emb_dim, flic_dim, gen_num_layers, gen_num_units, enc_num_layers, enc_num_units = params[0]
        if sent_emb_dim != self.sent_emb_dim or flic_dim != flic_dim or gen_num_layers != self.args.gen_num_hidden_layers or \
            gen_num_units != self.args.gen_num_hidden_units or enc_num_layers != self.args.enc_num_hidden_layers or \
            enc_num_units != self.args.enc_num_hidden_units:
            raise(ValueError, "The dimension of the loaded model are not consistent "
                              "with the dimensions of the command line arguments.")

        params_gen = [theano.shared(np.float64(x.get_value()).astype(theano.config.floatX)) for x in params[1]]
        params_enc = [theano.shared(np.float64(x.get_value()).astype(theano.config.floatX)) for x in params[2]]
        self.generator.set_params(params_gen)
        self.encoder.set_params(params_enc)

        params_opt_g = params[3]
        params_opt_e = params[4]
        for model_param, stored_param in zip(self.params_opt_g,params_opt_g):
            model_param.set_value(stored_param.get_value())

        for model_param, stored_param in zip(self.params_opt_e,params_opt_e):
            model_param.set_value(stored_param.get_value())

        self.adaptive_learning_rate.val_error = params[5]
        self.best_val_error = params[6]
def load_link_data(args, perc_validation_data=0.2, perc_test_data=None):
    """Loads the link data from the paths given by the command line parameters of the program. The training data
    is split into training, validation and test data.

    The following Data gets loaded: D, D_C, prod_to_sent.

    :type args: Argument Parser
    :param args: A class containing the parameters which were given to the program as command line arguments.

    :type perc_validation_data: float
    :param perc_validation_data: The percentage of the link data which should be in the validation set.

    :type perc_test_data: float
    :param perc_test_data: The percentage of the link data which should be in the test set.

    :type return value: A sextuple. Each item is a list of string-pairs.
                        If 'perc_test_data' == None, then only a quadruple is returned.

    :return return value: The datasets D, D_C, D_valid, D_C_valid, D_test, D_C_test.
                          If 'perc_test_data' == None, then only D, D_C, D_valid, D_C_valid is returned.
                          Each of these datasets stores a list of pairs. The first string in each pair denotes the object
                          ID of the reference product and the second string denotes the object ID of the target product.

    """

    # Load datasets from memory
    D = list(load(args.linkdata_pos))
    D_C = list(load(args.linkdata_neg))
    len_D = len(D)
    len_DC = len(D_C)

    # Permute datasets
    np.random.seed(1234)
    D = np.random.permutation(D)
    D_C = np.random.permutation(D_C)

    # Split the data in training, validation and test set.
    end_D_valid = int(len_D * perc_validation_data)
    end_DC_valid = int(len_DC * perc_validation_data)
    end_D_test = int(end_D_valid + len_D * perc_test_data)
    end_DC_test = int(end_DC_valid + len_DC * perc_test_data)
    return D[end_D_test:], D_C[end_DC_test:], \
           D[:end_D_valid], D_C[:end_DC_valid], \
           D[end_D_valid:end_D_test], D_C[end_DC_valid:end_DC_test]
def load_reviews(args):
    """Loads the reviews data set.

    :type args: Argument Parser
    :param args: A class containing the parameters which were given to the program as command line arguments.

    """

    reviews = load(args.reviews)
    return reviews
    def __init__(self, args, k=4, perc_validation_data=0.5):
        """
        :type args: Argument Parser
        :param args: A class containing the parameters which were given to the program as command line arguments.

        :type k: Int
        :param k: The number of disjoint data link samples.
        :type perc_validation_data: float
        :param perc_validation_data: The percentage of the links from the training part of the subsample, which is used as the
        validation data.

        :return type: A tuple of pairs
        :return return: D_train, D_C_train, D_valid, D_C_valid, D_test, D_C_test
        """

        self.k = k
        self.perc_validation_data = perc_validation_data

        # Load datasets from memory
        self.D = list(load(args.linkdata_pos))
        self.D_C = list(load(args.linkdata_neg))

        # Get list of products
        prods = {prod for prod, _ in self.D}
        prods = list(prods)

        # Permute product
        np.random.seed(1234)
        prods = np.random.permutation(prods)

        # Split products for the cross-validation runs
        num, rem = divmod(len(prods), k)
        self.prods = [
            set(prods[i * num + min(i, rem):(i + 1) * num + min(i + 1, rem)])
            for i in range(k)
        ]