def __init__(self, source_col, transform, stage=None):
        """
        @param source_col is the original Column or LazyColumn that this object
                          represents a transformed version of.
        @param transform should be a Transform object.
        @param stage is a string identifier for what this particular stage
                     of transformation represents.  This is so we can later
                     go through a series of transforms on a Column and
                     search for key transformation points.  Stage names should
                     be unique, so you cannot set a stage name that is the same
                     as any LazyColumn that exists in our list of ancestors.
        """
        self._source_col = source_col
        self._transform = transform
        self._original_col = source_col.original_col

        # DataColumns don't have stage names, so search all the way up until
        # the end
        for lazy_col in self.transformed_ancestors:
            if (stage is not None) and (stage == lazy_col.stage):
                raise ValueError("Cannot create LazyColumn with stage '%s' " \
                                 "because this identifier is used in ancestor"\
                                 " column %s " % (stage, lazy_col))
        self._stage = stage

        # FIXME: The hashing mechanism for transforms needs work...
        self._identity_hash = hashhelper.data_hash(source_col.identity_hash,
                                                   hash(transform))

        # _transformed_col says: Don't look at me from outside!  I don't have a
        # hash, and throwing me around outside of this warm, cozy LazyColumn
        # could have bad and not immediately apparent effects on performance.
        self._transformed_col = None
    def __init__(self, source_col, transform, stage=None):
        """
        @param source_col is the original Column or LazyColumn that this object
                          represents a transformed version of.
        @param transform should be a Transform object.
        @param stage is a string identifier for what this particular stage
                     of transformation represents.  This is so we can later
                     go through a series of transforms on a Column and
                     search for key transformation points.  Stage names should
                     be unique, so you cannot set a stage name that is the same
                     as any LazyColumn that exists in our list of ancestors.
        """
        self._source_col = source_col
        self._transform = transform
        self._original_col = source_col.original_col
        
        # DataColumns don't have stage names, so search all the way up until 
        # the end
        for lazy_col in self.transformed_ancestors: 
            if (stage is not None) and (stage == lazy_col.stage):
                raise ValueError("Cannot create LazyColumn with stage '%s' " \
                                 "because this identifier is used in ancestor"\
                                 " column %s " % (stage, lazy_col))
        self._stage = stage

        # FIXME: The hashing mechanism for transforms needs work...
        self._identity_hash = hashhelper.data_hash( source_col.identity_hash,
                                                    hash(transform) )
        
        # _transformed_col says: Don't look at me from outside!  I don't have a
        # hash, and throwing me around outside of this warm, cozy LazyColumn 
        # could have bad and not immediately apparent effects on performance.
        self._transformed_col = None        
 def value_hash(self):
     """Return a SHA1 hash of the contents of this DataColumn."""
     if self._value_hash is None:
         self._value_hash = hashhelper.data_hash(self._row_values)
     return self._value_hash
 def value_hash(self):
     """Return a SHA1 hash of the contents of this DataColumn."""
     if self._value_hash is None:
         self._value_hash = hashhelper.data_hash(self._row_values)
     return self._value_hash
示例#5
0
 def param_hash(self):
     return hashhelper.data_hash(self._value_mapping)
示例#6
0
 def hash(self):
     return hashhelper.data_hash( hashhelper.source_hash(self.__class__),
                                  self.param_hash() )