Пример #1
0
    def __init__(self, w, categoryList, name="category", verbosity=0):

        self.encoders = None
        self.verbosity = verbosity

        # number of categories includes "unknown"
        self.ncategories = len(categoryList) + 1

        self.categoryToIndex = dict()
        self.indexToCategory = dict()
        self.indexToCategory[0] = "<UNKNOWN>"
        for i in xrange(len(categoryList)):
            self.categoryToIndex[categoryList[i]] = i + 1
            self.indexToCategory[i + 1] = categoryList[i]

        self.encoder = ScalarEncoder(w,
                                     minval=0,
                                     maxval=self.ncategories - 1,
                                     radius=1,
                                     periodic=False)
        self.width = w * self.ncategories
        assert self.encoder.getWidth() == self.width

        self.description = [(name, 0)]
        self.name = name

        # These are used to support the topDownCompute method
        self._topDownMappingM = None

        # This gets filled in by getBucketValues
        self._bucketValues = None
Пример #2
0
    def __init__(self,
                 w=5,
                 resolution=1.0,
                 minval=0.10,
                 maxval=10000,
                 name="log",
                 verbosity=0):

        self.encoders = None
        self.verbosity = verbosity
        self.minScaledValue = int(10 * math.log10(minval))
        self.maxScaledValue = int(math.ceil(10 * math.log10(maxval)))
        assert self.maxScaledValue > self.minScaledValue

        self.minval = 10**(self.minScaledValue / 10.0)
        self.maxval = 10**(self.maxScaledValue / 10.0)

        # Note: passing resolution=1 causes the test to topDownCompute
        # test to fail.  Fixed for now by always converting to float,
        # but should find the root cause.
        self.encoder = ScalarEncoder(w=w,
                                     minval=self.minScaledValue,
                                     maxval=self.maxScaledValue,
                                     periodic=False,
                                     resolution=float(resolution))
        self.width = self.encoder.getWidth()
        self.description = [(name, 0)]
        self.name = name

        # This list is created by getBucketValues() the first time it is called,
        #  and re-created whenever our buckets would be re-arranged.
        self._bucketValues = None
Пример #3
0
    def __init__(self,
                 season=0,
                 dayOfWeek=0,
                 weekend=0,
                 holiday=0,
                 timeOfDay=0,
                 customDays=0,
                 name=''):

        self.width = 0
        self.description = []
        self.name = name

        # This will contain a list of (name, encoder, offset) tuples for use by
        #  the decode() method
        self.encoders = []

        self.seasonEncoder = None
        if season != 0:
            # Ignore leapyear differences -- assume 366 days in a year
            # Radius = 91.5 days = length of season
            # Value is number of days since beginning of year (0 - 355)
            if hasattr(season, "__getitem__"):
                w = season[0]
                radius = season[1]
            else:
                w = season
                radius = 91.5

            self.seasonEncoder = ScalarEncoder(w=w,
                                               minval=0,
                                               maxval=366,
                                               radius=radius,
                                               periodic=True,
                                               name="season")
            self.seasonOffset = self.width
            self.width += self.seasonEncoder.getWidth()
            self.description.append(("season", self.seasonOffset))
            self.encoders.append(
                ("season", self.seasonEncoder, self.seasonOffset))

        self.dayOfWeekEncoder = None
        if dayOfWeek != 0:
            # Value is day of week (floating point)
            # Radius is 1 day
            if hasattr(dayOfWeek, "__getitem__"):
                w = dayOfWeek[0]
                radius = dayOfWeek[1]
            else:
                w = dayOfWeek
                radius = 1
            self.dayOfWeekEncoder = ScalarEncoder(w=w,
                                                  minval=0,
                                                  maxval=7,
                                                  radius=radius,
                                                  periodic=True,
                                                  name="day of week")
            self.dayOfWeekOffset = self.width
            self.width += self.dayOfWeekEncoder.getWidth()
            self.description.append(("day of week", self.dayOfWeekOffset))
            self.encoders.append(
                ("day of week", self.dayOfWeekEncoder, self.dayOfWeekOffset))

        self.weekendEncoder = None
        if weekend != 0:
            # Binary value. Not sure if this makes sense. Also is somewhat redundant
            #  with dayOfWeek
            #Append radius if it was not provided
            if not hasattr(weekend, "__getitem__"):
                weekend = (weekend, 1)
            self.weekendEncoder = ScalarEncoder(w=weekend[0],
                                                minval=0,
                                                maxval=1,
                                                periodic=False,
                                                radius=weekend[1],
                                                name="weekend")
            self.weekendOffset = self.width
            self.width += self.weekendEncoder.getWidth()
            self.description.append(("weekend", self.weekendOffset))
            self.encoders.append(
                ("weekend", self.weekendEncoder, self.weekendOffset))

        #Set up custom days encoder, first argument in tuple is width
        #second is either a single day of the week or a list of the days
        #you want encoded as ones.
        self.customDaysEncoder = None
        if customDays != 0:
            customDayEncoderName = ""
            daysToParse = []
            assert len(
                customDays) == 2, "Please provide a w and the desired days"
            if isinstance(customDays[1], list):
                for day in customDays[1]:
                    customDayEncoderName += str(day) + " "
                daysToParse = customDays[1]
            elif isinstance(customDays[1], str):
                customDayEncoderName += customDays[1]
                daysToParse = [customDays[1]]
            else:
                assert False, "You must provide either a list of days or a single day"
            #Parse days
            self.customDays = []
            for day in daysToParse:
                if (day.lower() in ["mon", "monday"]):
                    self.customDays += [0]
                elif day.lower() in ["tue", "tuesday"]:
                    self.customDays += [1]
                elif day.lower() in ["wed", "wednesday"]:
                    self.customDays += [2]
                elif day.lower() in ["thu", "thursday"]:
                    self.customDays += [3]
                elif day.lower() in ["fri", "friday"]:
                    self.customDays += [4]
                elif day.lower() in ["sat", "saturday"]:
                    self.customDays += [5]
                elif day.lower() in ["sun", "sunday"]:
                    self.customDays += [6]
                else:
                    assert False, "Unable to understand %s as a day of week" % str(
                        day)
            self.customDaysEncoder = ScalarEncoder(w=customDays[0],
                                                   minval=0,
                                                   maxval=1,
                                                   periodic=False,
                                                   radius=1,
                                                   name=customDayEncoderName)
            self.customDaysOffset = self.width
            self.width += self.customDaysEncoder.getWidth()
            self.description.append(("customdays", self.customDaysOffset))
            self.encoders.append(
                ("customdays", self.customDaysEncoder, self.customDaysOffset))

        self.holidayEncoder = None
        if holiday != 0:
            # A "continuous" binary value. = 1 on the holiday itself and smooth ramp
            #  0->1 on the day before the holiday and 1->0 on the day after the holiday.
            self.holidayEncoder = ScalarEncoder(w=holiday,
                                                minval=0,
                                                maxval=1,
                                                periodic=False,
                                                radius=1,
                                                name="holiday")
            self.holidayOffset = self.width
            self.width += self.holidayEncoder.getWidth()
            self.description.append(("holiday", self.holidayOffset))
            self.encoders.append(
                ("holiday", self.holidayEncoder, self.holidayOffset))

        self.timeOfDayEncoder = None
        if timeOfDay != 0:
            # Value is time of day in hours
            # Radius = 4 hours, e.g. morning, afternoon, evening, early night,
            #  late night, etc.
            if hasattr(timeOfDay, "__getitem__"):
                w = timeOfDay[0]
                radius = timeOfDay[1]
            else:
                w = timeOfDay
                radius = 4
            self.timeOfDayEncoder = ScalarEncoder(w=w,
                                                  minval=0,
                                                  maxval=24,
                                                  periodic=True,
                                                  radius=radius,
                                                  name="time of day")
            self.timeOfDayOffset = self.width
            self.width += self.timeOfDayEncoder.getWidth()
            self.description.append(("time of day", self.timeOfDayOffset))
            self.encoders.append(
                ("time of day", self.timeOfDayEncoder, self.timeOfDayOffset))