Exemplo n.º 1
0
    def apply(self, transformation):
        """
        Applies a transformation on the current space.

        All transformations affect the data matrix. If the transformation
        reduces the dimensionality of the space, the column indexing
        structures are also updated. The operation applied is appended
        to the list of operations that the space holds.

        Args:
            transformation: of type Scaling, DimensionalityReduction or
              FeatureSelection

        Returns:
            A new space on which the transformation has been applied.

        """
        start = time.time()
        #TODO , FeatureSelection, DimReduction ..
        assert_is_instance(
            transformation,
            (Scaling, DimensionalityReduction, FeatureSelection))
        op = transformation.create_operation()
        new_matrix = op.apply(self.cooccurrence_matrix)

        new_operations = list(self.operations)
        new_operations.append(op)

        id2row, row2id = list(self.id2row), self.row2id.copy()

        if isinstance(op, DimensionalityReductionOperation):
            self.assert_1dim_element()
            id2column, column2id = [], {}
        elif isinstance(op, FeatureSelectionOperation):
            self.assert_1dim_element()
            op.original_columns = self.id2column

            if op.original_columns:
                id2column = list(
                    array(op.original_columns)[op.selected_columns])
                column2id = list2dict(id2column)
            else:
                id2column, column2id = [], {}
        else:
            id2column, column2id = list(self.id2column), self.column2id.copy()

        log.print_transformation_info(logger, transformation, 1,
                                      "\nApplied transformation:")
        log.print_matrix_info(logger, self.cooccurrence_matrix, 2,
                              "Original semantic space:")
        log.print_matrix_info(logger, new_matrix, 2,
                              "Resulted semantic space:")
        log.print_time_info(logger, time.time(), start, 2)

        return Space(new_matrix,
                     id2row,
                     id2column,
                     row2id,
                     column2id,
                     operations=new_operations)
Exemplo n.º 2
0
    def test_list2dict(self):
        test_cases = [(["a", "v", "d"], {"a": 0, "v": 1, "d": 2}), ([], {})]

        for list_, expected in test_cases:
            outcome = list2dict(list_)
            self.assertDictEqual(outcome, expected)

        self.assertRaises(ValueError, list2dict, ["a", "v", "a"])
Exemplo n.º 3
0
    def test_list2dict(self):
        test_cases = [(["a","v","d"], {"a":0, "v":1, "d":2}), ([], {})]

        for list_, expected in test_cases:
            outcome = list2dict(list_)
            self.assertDictEqual(outcome, expected)

        self.assertRaises(ValueError, list2dict, ["a","v","a"])
Exemplo n.º 4
0
    def apply(self, transformation):
        """
        Applies a transformation on the current space.
        
        All transformations affect the data matrix. If the transformation 
        reduces the dimensionality of the space, the column indexing
        structures are also updated. The operation applied is appended
        to the list of operations that the space holds. 
        
        Args:
            transformation: of type Scaling, DimensionalityReduction or 
              FeatureSelection  

        Returns:
            A new space on which the transformation has been applied.
            
        """
        start = time.time()
        #TODO , FeatureSelection, DimReduction ..
        assert_is_instance(transformation, (Scaling, DimensionalityReduction, 
                                            FeatureSelection))
        op = transformation.create_operation()
        new_matrix =  op.apply(self.cooccurrence_matrix)
        
        new_operations = list(self.operations)
        new_operations.append(op)

        id2row, row2id = list(self.id2row), self.row2id.copy() 
        
        
        if isinstance(op, DimensionalityReductionOperation):
            self.assert_1dim_element()
            id2column, column2id = [], {}
        elif isinstance(op, FeatureSelectionOperation):
            self.assert_1dim_element()
            op.original_columns = self.id2column
            
            if op.original_columns: 
                id2column = list(array(op.original_columns)[op.selected_columns])
                column2id = list2dict(id2column)
            else:
                id2column, column2id = [],{}
        else:
            id2column, column2id = list(self.id2column), self.column2id.copy()

        log.print_transformation_info(logger, transformation, 1, 
                                      "\nApplied transformation:")
        log.print_matrix_info(logger, self.cooccurrence_matrix, 2, 
                              "Original semantic space:")
        log.print_matrix_info(logger, new_matrix, 2, "Resulted semantic space:")
        log.print_time_info(logger, time.time(), start, 2)
                        
        return Space(new_matrix, id2row, id2column,
                     row2id, column2id, operations = new_operations)
Exemplo n.º 5
0
def train_all_spaces(core_space, an_dn_space, pn_space, sv_space, vo_space):
    core_space = core_space.apply(RowNormalization())
    print "train adj, det"
    a_d_space = train_one_space(core_space, an_dn_space, 0, 3)
    print "train prep"
    prep_space = train_one_space(core_space, pn_space, 1, 3)
    print "train vo"
    v_obj_space = train_one_space(core_space, vo_space, 0, 4)
    print "train sv"
    v_subj_space = train_one_space(core_space, sv_space, 1, 4)
    
    new_v_obj_rows = [row + ".objmat" for row in v_obj_space.id2row]
    v_obj_space._id2row = new_v_obj_rows
    v_obj_space._row2id = list2dict(new_v_obj_rows)
    
    new_v_subj_rows = [row + ".subjmat" for row in v_subj_space.id2row]
    v_subj_space._id2row = new_v_subj_rows
    v_subj_space._row2id = list2dict(new_v_subj_rows)
    
    all_mat_space = Space.vstack(a_d_space, prep_space)
    all_mat_space = Space.vstack(v_obj_space, all_mat_space)
    all_mat_space = Space.vstack(v_subj_space, all_mat_space)
    return all_mat_space
Exemplo n.º 6
0
    def _project_core_operations(self, matrix_):
       
        for operation in self._operations:
            if isinstance(operation, DimensionalityReductionOperation):
                self._id2column, self._column2id = [], {}
            
            if isinstance(operation, FeatureSelectionOperation):
                if operation.original_columns: 
                    self._id2column = list(array(operation.original_columns)[operation.selected_columns])
                    self._column2id = list2dict(self._id2column)
                else:
                    self._id2column, self._column2id = [],{}

            matrix_ = operation.project(matrix_)
        return matrix_
Exemplo n.º 7
0
    def _project_core_operations(self, matrix_):

        for operation in self._operations:
            if isinstance(operation, DimensionalityReductionOperation):
                self._id2column, self._column2id = [], {}

            if isinstance(operation, FeatureSelectionOperation):
                if operation.original_columns:
                    self._id2column = list(array(operation.original_columns)[operation.selected_columns])
                    self._column2id = list2dict(self._id2column)
                else:
                    self._id2column, self._column2id = [],{}

            matrix_ = operation.project(matrix_)
        return matrix_
Exemplo n.º 8
0
 def __init__(self, core_space, matrix_, id2row, row2id=None):
     """
     Constructor.
     
     Args:
         core_space: Space type, the core space that this is peripheral to.
         matrix_: Matrix type, the data matrix of the space
         id2row: list, the row elements
         row2id: dictionary, maps row strings to ids. Optional, built from 
             id2row by default.
          
     Returns:
          A peripheral semantic space (type PeripheralSpace) on which the 
          core space operations have been projected. Column indexing structures 
          and operations are taken over from the core space.
     
     Raises:
         TypeError: if matrix_ or core_space are not of the correct type
         ValueError: if element shape is not consistent with 
                      the size of matrix rows
                     if the matrix and the provided row and column 
                      indexing structures are not of consistent shapes.
     """
     assert_is_instance(matrix_, Matrix)
     assert_is_instance(core_space, Space)
     assert_is_instance(id2row, list)
     # TODO: assert it is not a peripheral space here!
     
     if row2id is None:
         row2id = list2dict(id2row)
     else:
         assert_dict_match_list(row2id, id2row)    
         
     column2id = core_space.column2id
     id2column = core_space.id2column
     
     self._operations = list(core_space.operations)    
     self._row2id = row2id
     self._id2row = id2row
     self._column2id = column2id
     self._id2column = id2column
     
     self._cooccurrence_matrix = self._project_core_operations(matrix_)
     assert_shape_consistent(self.cooccurrence_matrix, self._id2row,
                              self._id2column, self._row2id, self._column2id)
     
     self._element_shape = (self._cooccurrence_matrix.shape[1],)
Exemplo n.º 9
0
    def __init__(self, core_space, matrix_, id2row, row2id=None):
        """
        Constructor.

        Args:
            core_space: Space type, the core space that this is peripheral to.
            matrix_: Matrix type, the data matrix of the space
            id2row: list, the row elements
            row2id: dictionary, maps row strings to ids. Optional, built from
                id2row by default.

        Returns:
             A peripheral semantic space (type PeripheralSpace) on which the
             core space operations have been projected. Column indexing structures
             and operations are taken over from the core space.

        Raises:
            TypeError: if matrix_ or core_space are not of the correct type
            ValueError: if element shape is not consistent with
                         the size of matrix rows
                        if the matrix and the provided row and column
                         indexing structures are not of consistent shapes.
        """
        assert_is_instance(matrix_, Matrix)
        assert_is_instance(core_space, Space)
        assert_is_instance(id2row, list)
        # TODO: assert it is not a peripheral space here!

        if row2id is None:
            row2id = list2dict(id2row)
        else:
            assert_dict_match_list(row2id, id2row)

        column2id = core_space.column2id
        id2column = core_space.id2column

        self._operations = list(core_space.operations)
        self._row2id = row2id
        self._id2row = id2row
        self._column2id = column2id
        self._id2column = id2column

        self._cooccurrence_matrix = self._project_core_operations(matrix_)
        assert_shape_consistent(self.cooccurrence_matrix, self._id2row,
                                 self._id2column, self._row2id, self._column2id)

        self._element_shape = (self._cooccurrence_matrix.shape[1],)
Exemplo n.º 10
0
    def __init__(self, matrix_, id2row, id2column, row2id=None, column2id=None,
                 **kwargs):
        """
        Constructor.
        
        Args:
            matrix_: Matrix type, the data matrix of the space
            id2row: list, the row elements
            id2column: list, the column elements
            row2id: dictionary, maps row strings to ids. Optional, built from 
                id2row by default.
            column2id: dictionary, maps col strings to ids. Optional, built
                from id2column by default
            operations: list of operations already performed on the input
                matrix, Optional, by default set to empty.
            element_shape: tuple of int, the shape on row elements. Optional, 
                by default row elements are one-dimensional and element_shape is
                (no_cols, ). Used in 3D composition.
             
         Returns:
             A semantic space (type Space)
             
         Raises:
             TypeError: if matrix_ is not of the correct type
             ValueError: if element shape is not consistent with 
                         the size of matrix rows
                         if the matrix and the provided row and column 
                         indexing structures are not of consistent shapes.
                 
        """
        assert_is_instance(matrix_, Matrix)
        assert_valid_kwargs(kwargs, ["operations", "element_shape"])
        assert_is_instance(id2row, list)
        assert_is_instance(id2column, list)
        
        if row2id is None:
            row2id = list2dict(id2row)
        else:    
            assert_dict_match_list(row2id, id2row)
            
        if column2id is None:
            column2id = list2dict(id2column)
        else:
            assert_dict_match_list(column2id, id2column)
            
        assert_shape_consistent(matrix_, id2row, id2column, row2id, column2id)
        
        self._cooccurrence_matrix = matrix_
        self._row2id = row2id
        self._id2row = id2row
        self._column2id = column2id
        self._id2column = id2column
        if "operations" in kwargs:
            self._operations = kwargs["operations"]
        else:
            self._operations = []

        if "element_shape" in kwargs:
            elem_shape = kwargs["element_shape"]
            if prod(elem_shape) != self._cooccurrence_matrix.shape[1]:
                raise ValueError("Trying to assign invalid element shape:\
                                    element_shape: %s, matrix columns: %s" 
                                    % (str(elem_shape), 
                                       str(self._cooccurrence_matrix.shape[1])))
          
        # NOTE: watch out here, can cause bugs, if we change the dimension 
        # of a regular space and we do not create a new space         
            self._element_shape = kwargs["element_shape"]
        else:    
            self._element_shape = (self._cooccurrence_matrix.shape[1],)    
Exemplo n.º 11
0
    def __init__(self,
                 matrix_,
                 id2row,
                 id2column,
                 row2id=None,
                 column2id=None,
                 operations=[],
                 element_shape=None):
        """
        Constructor.

        Args:
            matrix_: Matrix type, the data matrix of the space
            id2row: list, the row elements
            id2column: list, the column elements
            row2id: dictionary, maps row strings to ids. Optional, built from
                id2row by default.
            column2id: dictionary, maps col strings to ids. Optional, built
                from id2column by default
            operations: list of operations already performed on the input
                matrix, Optional, by default set to empty.
            element_shape: tuple of int, the shape on row elements. Optional,
                by default row elements are one-dimensional and element_shape is
                (no_cols, ). Used in 3D composition.

         Returns:
             A semantic space (type Space)

         Raises:
             TypeError: if matrix_ is not of the correct type
             ValueError: if element shape is not consistent with
                         the size of matrix rows
                         if the matrix and the provided row and column
                         indexing structures are not of consistent shapes.

        """
        assert_is_instance(matrix_, Matrix)
        assert_is_instance(id2row, list)
        assert_is_instance(id2column, list)

        if row2id is None:
            row2id = list2dict(id2row)
        else:
            assert_dict_match_list(row2id, id2row)

        if column2id is None:
            column2id = list2dict(id2column)
        else:
            assert_dict_match_list(column2id, id2column)

        assert_shape_consistent(matrix_, id2row, id2column, row2id, column2id)

        self._cooccurrence_matrix = matrix_
        self._row2id = row2id
        self._id2row = id2row
        self._column2id = column2id
        self._id2column = id2column
        self._operations = operations

        if element_shape:
            if prod(element_shape) != self._cooccurrence_matrix.shape[1]:
                raise ValueError("Trying to assign invalid element shape:\
                                    element_shape: %s, matrix columns: %s" %
                                 (str(element_shape),
                                  str(self._cooccurrence_matrix.shape[1])))

        # NOTE: watch out here, can cause bugs, if we change the dimension
        # of a regular space and we do not create a new space
            self._element_shape = element_shape
        else:
            self._element_shape = (self._cooccurrence_matrix.shape[1], )