def applyAggregationModel(testReviews, featureAvgSent, model, busImportantFeatures, userImportantFeatures):
    logger = logging.getLogger('signature.aAM.applyAggregationModel')
    logger.info('starting applyAggregationModel from %d reviews'%len(testReviews))
    fsw = featureStructureWorker()
    
    for r, review in enumerate(testReviews):
        reviewFeatures = review['predSentiments']
        #features = encodeAspects2features(fsw, reviewFeatures)
        features = encodeAspects1features(fsw, reviewFeatures, featureAvgSent)
        #aggregation = model.predict(features)
        #print aggregation, review['stars']
        
        predictedFeatures = review['exPredFeatures']#[*,1]
        
        
        #Predicted Features, Predicted Sentiments by BUSINESS
        busID = review['business_id']
        if busID in busImportantFeatures:
            busSents =  busImportantFeatures[busID]['sentiment']
        else:
            busSents =  {}
        testData = {a:busSents[a][0] for a in busSents 
                    if a in predictedFeatures and predictedFeatures[a][1] == 1 and busSents[a][1] > 1}
        features = encodeAspects1features(fsw, testData, featureAvgSent)
        aggregationBUS = model.predict(features)
        
        review['rating_prediction'] = review.get('rating_prediction', {})
        review['rating_prediction']['aggregBUSavg'] = aggregationBUS
        
        
        if not r%10000:
            logger.debug('%d reviews processed'%r)
    
    return testReviews
def learnAggregationModelsCV(trainReviews, featureAvgSent, busImportantFeatures, path):
    logger = logging.getLogger("signature.lAMCV.learnAggregationModelsCV")
    logger.info("starting learnAggregationModel from %d reviews" % len(trainReviews))
    fsw = featureStructureWorker()

    learnData = list()
    learnLabels = list()
    for j, review in enumerate(trainReviews):
        reviewFeatures = fsw.getReviewFeaturesSentiment(review["features"])
        rating = review["stars"]
        for aspect in reviewFeatures:
            if (
                review["business_id"] in busImportantFeatures
                and aspect in busImportantFeatures[review["business_id"]]["sentiment"]
                and busImportantFeatures[review["business_id"]]["sentiment"][aspect][1] > 5
            ):
                sentiment = busImportantFeatures[review["business_id"]]["sentiment"][aspect][0]
                reviewFeatures[aspect] = sentiment
            else:
                reviewFeatures[aspect] = featureAvgSent[aspect]

        # features = encodeAspects2features(fsw, reviewFeatures)
        features = encodeAspects1features(fsw, reviewFeatures, featureAvgSent)
        learnData.append(features)
        learnLabels.append(rating)

    learnData = np.array(learnData)
    learnLabels = np.array(learnLabels)

    bestRes = 0.0
    bestReg = 0.0
    for reg in [0.01, 0.05, 0.1, 0.2, 0.5, 1.0, 5.0, 10, 15, 50, 100, 200, 500]:
        kf = cross_validation.KFold(len(learnLabels), n_folds=10)
        results = list()
        for train_index, test_index in kf:
            X_train, X_test = learnData[train_index], learnData[test_index]
            y_train, y_test = learnLabels[train_index], learnLabels[test_index]
            clf = linear_model.Ridge(alpha=reg)
            clf.fit(X_train, y_train)
            results.append(clf.score(X_test, y_test))
        if np.average(results) > bestRes:
            bestRes = np.average(results)
            bestReg = reg
        # print reg, np.average(results)
    logger.info("Best score %f with regularization = %.2f" % (bestRes, bestReg))

    clf = linear_model.Ridge(alpha=bestReg)
    clf.fit(learnData, learnLabels)

    return clf
def learnAggregationModelsCV(trainReviews, featureAvgSent, path):
    logger = logging.getLogger('signature.lAMCV.learnAggregationModelsCV')
    logger.info('starting learnAggregationModel from %d reviews'%len(trainReviews))
    fsw = featureStructureWorker()
    
    learnData = list()
    learnLabels = list()
    for j, review in enumerate(trainReviews):
        reviewFeatures = fsw.getReviewFeaturesSentiment(review['features'])
        rating = review['stars']
        #features = encodeAspects2features(fsw, reviewFeatures)
        features = encodeAspects1features(fsw, reviewFeatures, featureAvgSent)
        learnData.append(features)
        learnLabels.append(rating)
    
    learnData = np.array(learnData)
    learnLabels = np.array(learnLabels)
    
    bestRes = 0.0
    bestReg = 0.0
    for reg in [0.01,0.05,0.1,0.2,0.5,1.0,5.0,10,15,50,100,200,500]:
        kf = cross_validation.KFold(len(learnLabels), n_folds=10)
        results = list()
        for train_index, test_index in kf:
            X_train, X_test = learnData[train_index], learnData[test_index]
            y_train, y_test = learnLabels[train_index], learnLabels[test_index]
            clf = linear_model.Ridge(alpha = reg)
            clf.fit (X_train, y_train)
            results.append(clf.score(X_test, y_test))
        if np.average(results) > bestRes:
            bestRes = np.average(results)
            bestReg = reg
        #print reg, np.average(results)
    logger.info('Best score %f with regularization = %.2f'%(bestRes, bestReg))
    
    clf = linear_model.Ridge(alpha = bestReg)
    clf.fit(learnData, learnLabels)
    
    return clf