def computeAreaDensity(zCoord, trainTags, focusPoints, testTimes): """ Takes in tags and compute densities for all the users for the focus points specified Note: This only needs to be run once per run, no need to do bayes opt so maybe we can preprocess and store in file? -- trainTags: list of user tags focusPoints: list of focus points [(a, b), (c, d), (e, f)] where a through f are floats -- Returns: dict {timestamp: densityDistribution} """ print 'Generating test cases and computing shortest paths...' dfFloor18 = generateTestCases(focusPoints, trainTags, level=zCoord) dfFormattedFloor18 = formatDf(dfFloor18) uniqueUserID = dfFormattedFloor18['USER'].unique() dfFormattedFloor18['SHORTEST_PATHS'] = dfFormattedFloor18['SHORTEST_PATHS'].str.split(',') result = pd.DataFrame() #result = bayes.predictGP(dfFormattedFloor18[dfFormattedFloor18['USER'] == uniqueUserID[0]], testTimes) densityDist = None numOfSkippedFile = 0 # Iterate through each user count = 0 for user in uniqueUserID: print 'Computing GP', count, 'of', len(uniqueUserID), '. USER==', user count += 1 userResult = bayes.predictGP( dfFormattedFloor18[dfFormattedFloor18['USER'] == user], testTimes) userDensityDist = computeDensity(userResult, focusPoints, level=zCoord) if densityDist is None: densityDist = userDensityDist else: for timestamp in userDensityDist: # All timestamps the same # Add points userDensityDistTimestamp = userDensityDist[timestamp] densityDistTimestamp = densityDist[timestamp] userDensityDistTimestampPoints = userDensityDistTimestamp.getPoints() for point in userDensityDistTimestampPoints: densityDistPoint = densityDistTimestamp.query(point) if np.isnan(densityDistPoint): print 'NAN DETECTED' densityDistPoint = 0 addedProb = densityDistTimestamp.query(point) + userDensityDistTimestamp.query(point) densityDist[timestamp].setPoint(point, addedProb) return densityDist, numOfSkippedFile
def runBayes(df, testTimes): """ Takes in a dataframe of ['TIMESTAMP', 'USER', 'Z', SHORTEST_PATHS'] and performs a GP regression and a GP prediction --- df: dataframe output from formatDf method ['TIMESTAMP', 'USER', 'Z', SHORTEST_PATHS'] where 'SHORTEST_PATHS' is a series of 3-tuples, each of which contain shortest distances of each of the 3 selected points --- Returns: A DataFrame of ['TIMESTAMP', 'MU', 'VAR'] for all users concatenated 'MU' and 'VAR' both contains series of 3-tuples """ print 'NOT BEING USED' quit() df['SHORTEST_PATHS'] = df['SHORTEST_PATHS'].str.split(',') uniqueUserID = df['USER'].unique() result = pd.DataFrame() result = bayes.predictGP(df[df['USER'] == uniqueUserID[0]], testTimes) ''' # Iterate through each user and regress then concat the output for user in uniqueUserID: userResult = bayes.predictGP(df[df['USER'] == user], testTimes) result = pd.concat([result, userResult]) ''' return result
def runBayes(df, testTimes): """ Takes in a dataframe of ['TIMESTAMP', 'USER', 'Z', SHORTEST_PATHS'] and performs a GP regression and a GP prediction --- df: dataframe output from formatDf method ['TIMESTAMP', 'USER', 'Z', SHORTEST_PATHS'] where 'SHORTEST_PATHS' is a series of 3-tuples, each of which contain shortest distances of each of the 3 selected points --- Returns: A DataFrame of ['TIMESTAMP', 'MU', 'VAR'] for all users concatenated 'MU' and 'VAR' both contains series of 3-tuples """ print 'NOT BEING USED'; quit() df['SHORTEST_PATHS'] = df['SHORTEST_PATHS'].str.split(',') uniqueUserID = df['USER'].unique() result = pd.DataFrame() result = bayes.predictGP(df[df['USER'] == uniqueUserID[0]], testTimes) ''' # Iterate through each user and regress then concat the output for user in uniqueUserID: userResult = bayes.predictGP(df[df['USER'] == user], testTimes) result = pd.concat([result, userResult]) ''' return result
def computeAreaDensity(zCoord, trainTags, focusPoints, testTimes): """ Takes in tags and compute densities for all the users for the focus points specified Note: This only needs to be run once per run, no need to do bayes opt so maybe we can preprocess and store in file? -- trainTags: list of user tags focusPoints: list of focus points [(a, b), (c, d), (e, f)] where a through f are floats -- Returns: dict {timestamp: densityDistribution} """ print 'Generating test cases and computing shortest paths...' dfFloor18 = generateTestCases(focusPoints, trainTags, level=zCoord) dfFormattedFloor18 = formatDf(dfFloor18) uniqueUserID = dfFormattedFloor18['USER'].unique() dfFormattedFloor18['SHORTEST_PATHS'] = dfFormattedFloor18[ 'SHORTEST_PATHS'].str.split(',') result = pd.DataFrame() #result = bayes.predictGP(dfFormattedFloor18[dfFormattedFloor18['USER'] == uniqueUserID[0]], testTimes) densityDist = None numOfSkippedFile = 0 # Iterate through each user count = 0 for user in uniqueUserID: print 'Computing GP', count, 'of', len(uniqueUserID), '. USER==', user count += 1 userResult = bayes.predictGP( dfFormattedFloor18[dfFormattedFloor18['USER'] == user], testTimes) userDensityDist = computeDensity(userResult, focusPoints, level=zCoord) if densityDist is None: densityDist = userDensityDist else: for timestamp in userDensityDist: # All timestamps the same # Add points userDensityDistTimestamp = userDensityDist[timestamp] densityDistTimestamp = densityDist[timestamp] userDensityDistTimestampPoints = userDensityDistTimestamp.getPoints( ) for point in userDensityDistTimestampPoints: densityDistPoint = densityDistTimestamp.query(point) if np.isnan(densityDistPoint): print 'NAN DETECTED' densityDistPoint = 0 addedProb = densityDistTimestamp.query( point) + userDensityDistTimestamp.query(point) densityDist[timestamp].setPoint(point, addedProb) return densityDist, numOfSkippedFile