def compose(self, data, arg_space): """ Uses a composition model to compose elements. Args: data: data to be composed. List of tuples, each containing 3 strings: (arg1, arg2, composed_phrase). arg1 and arg2 are the elements to be composed and composed_phrase is the string associated to their composition. arg_space: argument space(s). Space object or a tuple of two Space objects (e.g. my_space, or (my_space1, my_space2)). If two spaces are provided, arg1 elements of data are interpreted in space1, and arg2 in space2. Returns: composed space: a new object of type Space, containing the phrases obtained through composition. """ start = time.time() arg1_space, arg2_space = self.extract_arg_spaces(arg_space) arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(data, (arg1_space.row2id, arg2_space.row2id, None)) # we try to achieve at most MAX_MEM_OVERHEAD*phrase_space memory overhead # the /3.0 is needed # because the composing data needs 3 * len(train_data) memory (arg1 vector, arg2 vector, phrase vector) chunk_size = int(max(arg1_space.cooccurrence_matrix.shape[0],arg2_space.cooccurrence_matrix.shape[0],len(phrase_list)) * self.MAX_MEM_OVERHEAD / 3.0) + 1 composed_mats = [] for i in range(int(math.ceil(len(arg1_list) / float(chunk_size)))): beg, end = i*chunk_size, min((i+1)*chunk_size, len(arg1_list)) arg1_mat = arg1_space.get_rows(arg1_list[beg:end]) arg2_mat = arg2_space.get_rows(arg2_list[beg:end]) [arg1_mat, arg2_mat] = resolve_type_conflict([arg1_mat, arg2_mat], DenseMatrix) composed_mat = self._compose(arg1_mat, arg2_mat) composed_mats.append(composed_mat) composed_phrase_mat = composed_mat.nary_vstack(composed_mats) if self.composed_id2column is None: self.composed_id2column = self._build_id2column(arg1_space, arg2_space) log.print_name(logger, self, 1, "\nComposed with composition model:") log.print_info(logger, 3, "Composed total data points:%s" % arg1_mat.shape[0]) log.print_matrix_info(logger, composed_phrase_mat, 4, "Resulted (composed) semantic space::") log.print_time_info(logger, time.time(), start, 2) return Space(composed_phrase_mat, phrase_list, self.composed_id2column)
def compose(self, data, arg_space): """ Uses a lexical function composition model to compose elements. Args: data: data to be composed. List of tuples, each containing 3 strings: (function_word, arg, composed_phrase). function_word and arg are the elements to be composed and composed_phrase is the string associated to their composition. function_word elements are interpreted in self.function_space. arg_space: argument space, of type Space. arg elements of data are interpreted in this space. Returns: composed space: a new object of type Space, containing the phrases obtained through composition. """ start = time.time() assert_is_instance(arg_space, Space) arg1_list, arg2_list, phrase_list = self.valid_data_to_lists( data, (self._function_space.row2id, arg_space.row2id, None)) composed_vec_list = [] for i in range(len(arg1_list)): arg1_vec = self._function_space.get_row(arg1_list[i]) arg2_vec = arg_space.get_row(arg2_list[i]) matrix_type = get_type_of_largest([arg1_vec, arg2_vec]) [arg1_vec, arg2_vec] = resolve_type_conflict([arg1_vec, arg2_vec], matrix_type) composed_ph_vec = self._compose(arg1_vec, arg2_vec, self._function_space.element_shape) composed_vec_list.append(composed_ph_vec) result_element_shape = self._function_space.element_shape[0:-1] composed_ph_mat = composed_ph_vec.nary_vstack(composed_vec_list) log.print_name(logger, self, 1, "\nComposed with composition model:") log.print_info(logger, 3, "Composed total data points:%s" % len(arg1_list)) log.print_info( logger, 3, "Functional shape of the resulted (composed) elements:%s" % (result_element_shape, )) log.print_matrix_info(logger, composed_ph_mat, 4, "Resulted (composed) semantic space:") log.print_time_info(logger, time.time(), start, 2) return Space(composed_ph_mat, phrase_list, self.composed_id2column, element_shape=result_element_shape)
def compose(self, data, arg_space): """ Uses a lexical function composition model to compose elements. Args: data: data to be composed. List of tuples, each containing 3 strings: (function_word, arg, composed_phrase). function_word and arg are the elements to be composed and composed_phrase is the string associated to their composition. function_word elements are interpreted in self.function_space. arg_space: argument space, of type Space. arg elements of data are interpreted in this space. Returns: composed space: a new object of type Space, containing the phrases obtained through composition. """ start = time.time() assert_is_instance(arg_space, Space) arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(data, (self._function_space.row2id, arg_space.row2id, None)) composed_vec_list = [] for i in xrange(len(arg1_list)): arg1_vec = self._function_space.get_row(arg1_list[i]) arg2_vec = arg_space.get_row(arg2_list[i]) matrix_type = get_type_of_largest([arg1_vec, arg2_vec]) [arg1_vec, arg2_vec] = resolve_type_conflict([arg1_vec, arg2_vec], matrix_type) composed_ph_vec = self._compose(arg1_vec, arg2_vec, self._function_space.element_shape) composed_vec_list.append(composed_ph_vec) result_element_shape = self._function_space.element_shape[0:-1] composed_ph_mat = composed_ph_vec.nary_vstack(composed_vec_list) log.print_name(logger, self, 1, "\nComposed with composition model:") log.print_info(logger, 3, "Composed total data points:%s" % len(arg1_list)) log.print_info(logger, 3, "Functional shape of the resulted (composed) elements:%s" % (result_element_shape,)) log.print_matrix_info(logger, composed_ph_mat, 4, "Resulted (composed) semantic space:") log.print_time_info(logger, time.time(), start, 2) return Space(composed_ph_mat, phrase_list, self.composed_id2column, element_shape = result_element_shape)
def get_neighbours(self, word, no_neighbours, similarity, space2=None): """ Computes the neighbours of a word in the semantic space. Args: word: string, target word no_neighbours: int, the number of neighbours desired similarity: of type Similarity, the similarity measure to be used space2: Space type, Optional. If provided, the neighbours are retrieved from this space, rather than the current space. Default, neighbours are retrieved from the current space. Returns: list of (neighbour_string, similarity_value) tuples. Raises: KeyError: if the word is not found in the semantic space. """ start = time.time() assert_is_instance(similarity, Similarity) vector = self.get_row(word) if space2 is None: id2row = self.id2row sims_to_matrix = similarity.get_sims_to_matrix(vector, self.cooccurrence_matrix) else: mat_type = type(space2.cooccurrence_matrix) if not isinstance(vector, mat_type): vector = mat_type(vector) sims_to_matrix = similarity.get_sims_to_matrix(vector, space2.cooccurrence_matrix) id2row = space2.id2row sorted_perm = sims_to_matrix.sorted_permutation(sims_to_matrix.sum, 1) no_neighbours = min(no_neighbours, len(id2row)) result = [] for count in range(no_neighbours): i = sorted_perm[count] result.append((id2row[i], sims_to_matrix[i,0])) log.print_info(logger, 1, "\nGetting neighbours of:%s" % (word)) log.print_name(logger, similarity, 1, "Similarity:") log.print_time_info(logger, time.time(), start, 2) return result
def get_neighbours(self, word, no_neighbours, similarity, space2=None): """ Computes the neighbours of a word in the semantic space. Args: word: string, target word no_neighbours: int, the number of neighbours desired similarity: of type Similarity, the similarity measure to be used space2: Space type, Optional. If provided, the neighbours are retrieved from this space, rather than the current space. Default, neighbours are retrieved from the current space. Returns: list of (neighbour_string, similarity_value) tuples. Raises: KeyError: if the word is not found in the semantic space. """ start = time.time() assert_is_instance(similarity, Similarity) vector = self.get_row(word) if space2 is None: id2row = self.id2row sims_to_matrix = similarity.get_sims_to_matrix( vector, self.cooccurrence_matrix) else: mat_type = type(space2.cooccurrence_matrix) if not isinstance(vector, mat_type): vector = mat_type(vector) sims_to_matrix = similarity.get_sims_to_matrix( vector, space2.cooccurrence_matrix) id2row = space2.id2row sorted_perm = sims_to_matrix.sorted_permutation(sims_to_matrix.sum, 1) no_neighbours = min(no_neighbours, len(id2row)) result = [] for count in range(no_neighbours): i = sorted_perm[count] result.append((id2row[i], sims_to_matrix[i, 0])) log.print_info(logger, 1, "\nGetting neighbours of:%s" % (word)) log.print_name(logger, similarity, 1, "Similarity:") log.print_time_info(logger, time.time(), start, 2) return result
def compose(self, data, arg_space): """ Uses a composition model to compose elements. Args: data: data to be composed. List of tuples, each containing 3 strings: (arg1, arg2, composed_phrase). arg1 and arg2 are the elements to be composed and composed_phrase is the string associated to their composition. arg_space: argument space(s). Space object or a tuple of two Space objects (e.g. my_space, or (my_space1, my_space2)). If two spaces are provided, arg1 elements of data are interpreted in space1, and arg2 in space2. Returns: composed space: a new object of type Space, containing the phrases obtained through composition. """ start = time.time() arg1_space, arg2_space = self.extract_arg_spaces(arg_space) arg1_list, arg2_list, phrase_list = self.valid_data_to_lists( data, (arg1_space.row2id, arg2_space.row2id, None)) arg1_mat = arg1_space.get_rows(arg1_list) arg2_mat = arg2_space.get_rows(arg2_list) [arg1_mat, arg2_mat] = resolve_type_conflict([arg1_mat, arg2_mat], DenseMatrix) composed_phrase_mat = self._compose(arg1_mat, arg2_mat) if self.composed_id2column is None: self.composed_id2column = self._build_id2column( arg1_space, arg2_space) log.print_name(logger, self, 1, "\nComposed with composition model:") log.print_info(logger, 3, "Composed total data points:%s" % arg1_mat.shape[0]) log.print_matrix_info(logger, composed_phrase_mat, 4, "Resulted (composed) semantic space::") log.print_time_info(logger, time.time(), start, 2) return Space(composed_phrase_mat, phrase_list, self.composed_id2column)
def compose(self, data, arg_space): """ Uses a composition model to compose elements. Args: data: data to be composed. List of tuples, each containing 3 strings: (arg1, arg2, composed_phrase). arg1 and arg2 are the elements to be composed and composed_phrase is the string associated to their composition. arg_space: argument space(s). Space object or a tuple of two Space objects (e.g. my_space, or (my_space1, my_space2)). If two spaces are provided, arg1 elements of data are interpreted in space1, and arg2 in space2. Returns: composed space: a new object of type Space, containing the phrases obtained through composition. """ start = time.time() arg1_space, arg2_space = self.extract_arg_spaces(arg_space) arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(data, (arg1_space.row2id, arg2_space.row2id, None)) arg1_mat = arg1_space.get_rows(arg1_list) arg2_mat = arg2_space.get_rows(arg2_list) [arg1_mat, arg2_mat] = resolve_type_conflict([arg1_mat, arg2_mat], DenseMatrix) composed_phrase_mat = self._compose(arg1_mat, arg2_mat) if self.composed_id2column is None: self.composed_id2column = self._build_id2column(arg1_space, arg2_space) log.print_name(logger, self, 1, "\nComposed with composition model:") log.print_info(logger, 3, "Composed total data points:%s" % arg1_mat.shape[0]) log.print_matrix_info(logger, composed_phrase_mat, 4, "Resulted (composed) semantic space::") log.print_time_info(logger, time.time(), start, 2) return Space(composed_phrase_mat, phrase_list, self.composed_id2column)