def computeBaseLine(self, baselineType=0): iterWeek, endWeek = datetime(2013, 3, 23), datetime(2013, 4, 20) db = DBController() fg = FeatureGenerator() baselineScore = 0 while iterWeek <= endWeek: lastWeek = iterWeek - timedelta(weeks=1) featureList = db.getFeatureListByWeek(iterWeek) y_pred, y_test = [], [] for featureVector in featureList: songId = featureVector["id"] lastWeekRank = db.getTop50Rank(lastWeek, songId) if lastWeekRank is None: lastWeekScore = 0 else: lastWeekScore = fg.rankToPopScore(lastWeekRank) currentWeekRank = featureVector["rank"] currentWeekScore = fg.rankToPopScore(currentWeekRank) if currentWeekRank is not None else lastWeekScore y_pred.append(lastWeekScore) y_test.append(currentWeekScore) y_pred, y_test = self.getRankArray(numpy.asarray(y_pred)), self.getRankArray(numpy.asarray(y_test)) if baselineType == 0: baselineScore += self.getRankEvalationScore(y_pred, y_test) elif baselineType == 1: baselineScore += metrics.r2_score(y_pred, y_test) else: baselineScore += metrics.mean_squared_error(y_pred, y_test) iterWeek += timedelta(weeks=1) baselineScore = baselineScore / 5 print baselineScore
def getFeatureMatrix(self, beginWeek, endWeek=datetime.today(), mode=0, withSongId=False): if beginWeek < datetime(2007, 1, 1) or endWeek > datetime.today(): raise Exception('Invalid input date!') beginWeek, endWeek = dateToSaturday(beginWeek), dateToSaturday(endWeek) endWeek = endWeek - timedelta(days=7) if endWeek > datetime.today() else endWeek iterWeek = beginWeek db = DBController() matrix = [] while iterWeek <= endWeek: featureList = db.getFeatureListByWeek(iterWeek) for featureDict in featureList: featureVector = self.featureDictToList(featureDict, mode, withSongId) if featureVector is None: continue else: matrix.append(featureVector) iterWeek += timedelta(weeks=1) matrix = numpy.matrix(matrix) return matrix