def __init__(self, w, categoryList, name="category", verbosity=0): self.encoders = None self.verbosity = verbosity # number of categories includes "unknown" self.ncategories = len(categoryList) + 1 self.categoryToIndex = dict() self.indexToCategory = dict() self.indexToCategory[0] = "<UNKNOWN>" for i in xrange(len(categoryList)): self.categoryToIndex[categoryList[i]] = i + 1 self.indexToCategory[i + 1] = categoryList[i] self.encoder = ScalarEncoder(w, minval=0, maxval=self.ncategories - 1, radius=1, periodic=False) self.width = w * self.ncategories assert self.encoder.getWidth() == self.width self.description = [(name, 0)] self.name = name # These are used to support the topDownCompute method self._topDownMappingM = None # This gets filled in by getBucketValues self._bucketValues = None
def __init__(self, w=5, resolution=1.0, minval=0.10, maxval=10000, name="log", verbosity=0): self.encoders = None self.verbosity = verbosity self.minScaledValue = int(10 * math.log10(minval)) self.maxScaledValue = int(math.ceil(10 * math.log10(maxval))) assert self.maxScaledValue > self.minScaledValue self.minval = 10**(self.minScaledValue / 10.0) self.maxval = 10**(self.maxScaledValue / 10.0) # Note: passing resolution=1 causes the test to topDownCompute # test to fail. Fixed for now by always converting to float, # but should find the root cause. self.encoder = ScalarEncoder(w=w, minval=self.minScaledValue, maxval=self.maxScaledValue, periodic=False, resolution=float(resolution)) self.width = self.encoder.getWidth() self.description = [(name, 0)] self.name = name # This list is created by getBucketValues() the first time it is called, # and re-created whenever our buckets would be re-arranged. self._bucketValues = None
def __init__(self, season=0, dayOfWeek=0, weekend=0, holiday=0, timeOfDay=0, customDays=0, name=''): self.width = 0 self.description = [] self.name = name # This will contain a list of (name, encoder, offset) tuples for use by # the decode() method self.encoders = [] self.seasonEncoder = None if season != 0: # Ignore leapyear differences -- assume 366 days in a year # Radius = 91.5 days = length of season # Value is number of days since beginning of year (0 - 355) if hasattr(season, "__getitem__"): w = season[0] radius = season[1] else: w = season radius = 91.5 self.seasonEncoder = ScalarEncoder(w=w, minval=0, maxval=366, radius=radius, periodic=True, name="season") self.seasonOffset = self.width self.width += self.seasonEncoder.getWidth() self.description.append(("season", self.seasonOffset)) self.encoders.append( ("season", self.seasonEncoder, self.seasonOffset)) self.dayOfWeekEncoder = None if dayOfWeek != 0: # Value is day of week (floating point) # Radius is 1 day if hasattr(dayOfWeek, "__getitem__"): w = dayOfWeek[0] radius = dayOfWeek[1] else: w = dayOfWeek radius = 1 self.dayOfWeekEncoder = ScalarEncoder(w=w, minval=0, maxval=7, radius=radius, periodic=True, name="day of week") self.dayOfWeekOffset = self.width self.width += self.dayOfWeekEncoder.getWidth() self.description.append(("day of week", self.dayOfWeekOffset)) self.encoders.append( ("day of week", self.dayOfWeekEncoder, self.dayOfWeekOffset)) self.weekendEncoder = None if weekend != 0: # Binary value. Not sure if this makes sense. Also is somewhat redundant # with dayOfWeek #Append radius if it was not provided if not hasattr(weekend, "__getitem__"): weekend = (weekend, 1) self.weekendEncoder = ScalarEncoder(w=weekend[0], minval=0, maxval=1, periodic=False, radius=weekend[1], name="weekend") self.weekendOffset = self.width self.width += self.weekendEncoder.getWidth() self.description.append(("weekend", self.weekendOffset)) self.encoders.append( ("weekend", self.weekendEncoder, self.weekendOffset)) #Set up custom days encoder, first argument in tuple is width #second is either a single day of the week or a list of the days #you want encoded as ones. self.customDaysEncoder = None if customDays != 0: customDayEncoderName = "" daysToParse = [] assert len( customDays) == 2, "Please provide a w and the desired days" if isinstance(customDays[1], list): for day in customDays[1]: customDayEncoderName += str(day) + " " daysToParse = customDays[1] elif isinstance(customDays[1], str): customDayEncoderName += customDays[1] daysToParse = [customDays[1]] else: assert False, "You must provide either a list of days or a single day" #Parse days self.customDays = [] for day in daysToParse: if (day.lower() in ["mon", "monday"]): self.customDays += [0] elif day.lower() in ["tue", "tuesday"]: self.customDays += [1] elif day.lower() in ["wed", "wednesday"]: self.customDays += [2] elif day.lower() in ["thu", "thursday"]: self.customDays += [3] elif day.lower() in ["fri", "friday"]: self.customDays += [4] elif day.lower() in ["sat", "saturday"]: self.customDays += [5] elif day.lower() in ["sun", "sunday"]: self.customDays += [6] else: assert False, "Unable to understand %s as a day of week" % str( day) self.customDaysEncoder = ScalarEncoder(w=customDays[0], minval=0, maxval=1, periodic=False, radius=1, name=customDayEncoderName) self.customDaysOffset = self.width self.width += self.customDaysEncoder.getWidth() self.description.append(("customdays", self.customDaysOffset)) self.encoders.append( ("customdays", self.customDaysEncoder, self.customDaysOffset)) self.holidayEncoder = None if holiday != 0: # A "continuous" binary value. = 1 on the holiday itself and smooth ramp # 0->1 on the day before the holiday and 1->0 on the day after the holiday. self.holidayEncoder = ScalarEncoder(w=holiday, minval=0, maxval=1, periodic=False, radius=1, name="holiday") self.holidayOffset = self.width self.width += self.holidayEncoder.getWidth() self.description.append(("holiday", self.holidayOffset)) self.encoders.append( ("holiday", self.holidayEncoder, self.holidayOffset)) self.timeOfDayEncoder = None if timeOfDay != 0: # Value is time of day in hours # Radius = 4 hours, e.g. morning, afternoon, evening, early night, # late night, etc. if hasattr(timeOfDay, "__getitem__"): w = timeOfDay[0] radius = timeOfDay[1] else: w = timeOfDay radius = 4 self.timeOfDayEncoder = ScalarEncoder(w=w, minval=0, maxval=24, periodic=True, radius=radius, name="time of day") self.timeOfDayOffset = self.width self.width += self.timeOfDayEncoder.getWidth() self.description.append(("time of day", self.timeOfDayOffset)) self.encoders.append( ("time of day", self.timeOfDayEncoder, self.timeOfDayOffset))