def processFileNoLatLon(self, folder, file, fileColCnt, fileNorm): InOut.console_func_begin("processFileNoLatLon") fileUser = file fileUserNorm = fileNorm lines = [] with (open(fileUser, "r")) as fin: lines = fin.readlines() lineListNew = [] for line in lines: line = line.strip("\n") list = line.split("\t") if (len(list) != fileColCnt): continue userId = int(list[0]) lat = list[1] lon = list[2] if (self.isNoLatLon(lat, lon)): continue lineListNew.append(line) FileTool.FileTool.WriteStrListToFileWithNewLine( lineListNew, fileUserNorm)
def processFileCheckInNoLatLon(self): InOut.console_func_begin("processFileCheckInNoLatLon") dictVenue = Venue.loadDictVenueFromNormFile() folder = join(Config.folderData, Config.folderDataParsed) fileCheckIn = join(folder, Config.fileCheckin) fileCheckinNorm = join(folder, Config.fileNorm + Config.fileCheckin) lines=[] with(open(fileCheckIn,"r")) as fin: lines=fin.readlines() lineListNew = [] for line in lines: line = line.strip("\n") list = line.split("\t") if(len(list) != Config.fileCheckinColNum): continue venueId = int(list[2]) if(dictVenue.has_key(venueId) == False): continue; lat = list[3] lon = list[4] if(self.isNoLatLon(lat, lon)): list[3] = "%f" % dictVenue[venueId].latitude list[4] = "%f" % dictVenue[venueId].longitude lineNew = "\t".join(list) lineListNew.append(lineNew) #break; FileTool.FileTool.WriteStrListToFileWithNewLine(lineListNew, fileCheckinNorm)
def loadDictVenueFromNormFile(cls, file=""): InOut.console_func_begin("Venue loadDictVenueFromNormFile") if (file == ""): file = join(Config.folderData, Config.folderDataParsed, Config.fileNorm + Config.fileVenue) dictVenue = {} listVenue = [] lines = [] with (open(file, "r")) as fin: lines = fin.readlines() for line in lines: line = line.strip("\n") list = line.split("\t") #print list if (len(list) != Config.fileVenueColNum): continue venueId = int(list[0]) lat = float(list[1]) lon = float(list[2]) #print lat, lon venue = Venue(venueId, lat, lon) listVenue.append(venue) if (dictVenue.has_key(venueId) == False): dictVenue[venueId] = venue return dictVenue
def filt_by_lat_lon_socialgraph(self): InOut.console_func_begin("filt_by_lat_lon_socialgraph") setUser = self.load_filt_user_set() fileSocialgraph = join(Config.folderData, Config.folderDataParsed, Config.fileSocialGraph) filtSocialgraph = join(Config.folderData, Config.folderDataParsed, Config.fileFilt + Config.fileSocialGraph) lineList = FileTool.FileTool.ReadLineListFromFile(fileSocialgraph) lineListNew = [] index = 0 for line in lineList: index = index + 1 list = line.split("\t") if (len(list) == 2): u1 = Venue.intStrToInt(list[0]) u2 = Venue.intStrToInt(list[1]) #print "u1:$%s$, u2:$%s$" % (u1, u2) if ((u1 in setUser) and (u2 in setUser)): lineListNew.append(line) if (index % 10000 == 0): str = "%s/%s" % (index, len(lineList)) print str FileTool.FileTool.WriteStrListToFileWithNewLine( lineListNew, filtSocialgraph) pass
def loadDictVenueFromNormFile(cls, file=""): InOut.console_func_begin("Venue loadDictVenueFromNormFile") if(file == ""): file = join(Config.folderData, Config.folderDataParsed, Config.fileNorm + Config.fileVenue) dictVenue = {} listVenue = [] lines = [] with(open(file, "r")) as fin: lines = fin.readlines() for line in lines: line = line.strip("\n") list = line.split("\t") #print list if(len(list) != Config.fileVenueColNum): continue venueId = int(list[0]) lat = float(list[1]) lon = float(list[2]) #print lat, lon venue = Venue(venueId, lat, lon) listVenue.append(venue) if(dictVenue.has_key(venueId) == False): dictVenue[venueId] = venue return dictVenue
def processFileNoLatLon(self, folder, file, fileColCnt, fileNorm): InOut.console_func_begin("processFileNoLatLon") fileUser = file fileUserNorm = fileNorm lines=[] with(open(fileUser,"r")) as fin: lines=fin.readlines() lineListNew = [] for line in lines: line = line.strip("\n") list = line.split("\t") if(len(list) != fileColCnt): continue userId = int(list[0]) lat = list[1] lon = list[2] if(self.isNoLatLon(lat, lon)): continue lineListNew.append(line) FileTool.FileTool.WriteStrListToFileWithNewLine(lineListNew, fileUserNorm)
def processFileVenueNoLatLon(self): InOut.console_func_begin("processFileVenueNoLatLon") folder = join(Config.folderData, Config.folderDataParsed) fileVenue = join(folder, Config.fileVenue ) fileVenueNorm = join(folder, Config.fileNorm + Config.fileVenue) self.processFileNoLatLon(folder, fileVenue, Config.fileVenueColNum, fileVenueNorm)
def start(self): InOut.console_func_begin("AnalyseVenue start") self.dictCity = GeoMap.load_city_dict() #self.listVenue = self.loadVenueFromFileNorm() #self.statisVenue() #print "debug" self.checkVenueInDictCity()
def processFileCheckInNoLatLon(self): InOut.console_func_begin("processFileCheckInNoLatLon") dictVenue = Venue.loadDictVenueFromNormFile() folder = join(Config.folderData, Config.folderDataParsed) fileCheckIn = join(folder, Config.fileCheckin) fileCheckinNorm = join(folder, Config.fileNorm + Config.fileCheckin) lines = [] with (open(fileCheckIn, "r")) as fin: lines = fin.readlines() lineListNew = [] for line in lines: line = line.strip("\n") list = line.split("\t") if (len(list) != Config.fileCheckinColNum): continue venueId = int(list[2]) if (dictVenue.has_key(venueId) == False): continue lat = list[3] lon = list[4] if (self.isNoLatLon(lat, lon)): list[3] = "%f" % dictVenue[venueId].latitude list[4] = "%f" % dictVenue[venueId].longitude lineNew = "\t".join(list) lineListNew.append(lineNew) #break; FileTool.FileTool.WriteStrListToFileWithNewLine( lineListNew, fileCheckinNorm)
def load_dict_checkuser(cls, flagExp = True): InOut.console_func_begin("load_dict_checkuser") res = {} if(flagExp): file = join(Config.folderData, Config.folderDataParsed, Config.folderExp, Config.fileFlagCheck + Config.fileUser) res = User.loadDictUserFromFile(file) return res
def load_dict_user(cls): InOut.console_func_begin("load_dict_user") #dict user: user_id => user res = {} file = join(Config.folderData, Config.folderDataParsed, Config.folderExp, Config.folderRatio, Config.fileUser) res = User.loadDictUserFromFile(file) return res
def processFileVenueNoLatLon(self): InOut.console_func_begin("processFileVenueNoLatLon") folder = join(Config.folderData, Config.folderDataParsed) fileVenue = join(folder, Config.fileVenue) fileVenueNorm = join(folder, Config.fileNorm + Config.fileVenue) self.processFileNoLatLon(folder, fileVenue, Config.fileVenueColNum, fileVenueNorm)
def load_dict_user_city(cls, flagExp = True, flagRatio=True): InOut.console_func_begin("load_dict_user_city") dictUserCity = {} if(flagExp): file = join(Config.folderData, Config.folderDataParsed, Config.folderExp, Config.fileUserCity) if(flagRatio): file = join(Config.folderData, Config.folderDataParsed, Config.folderExp, Config.folderRatio, Config.fileUserCity) dictUserCity = FileTool.ReadFileDictStrStr(file, 0, 1) return dictUserCity
def filtByLatLon(self): InOut.console_func_begin("filtByLatLon") self.filt_by_lat_lon_user() #self.filt_by_lat_lon_venue() #self.filt_by_lat_lon_socialgraph() #self.filt_by_lat_lon_checkin() #self.filt_by_lat_lon_rating() pass
def start(self): InOut.console_func_begin("PreProcess") #self.proDbCheckInNoLatLon() #self.processFileFormat() #self.processNoLatLon() #self.filtByLatLon() InOut.console_func_end("PreProcess") pass
def load_dict_user_loc(cls): InOut.console_func_begin("load_dict_user_loc") res = LoadData.load_dict_user() #dict user location: user_id => location res2 = {} for user_id in res: user = res[user_id] loc = Location(user.latitude, user.longitude) res2[user_id] = loc return res2
def start(self): InOut.console_func_begin("Analyse start") analyseVenue = AnalyseVenue() #analyseVenue.start() analyseUser = AnalyseUser() #analyseUser.start() analyseCheckin = AnalyseCheckin() #analyseCheckin.start() analyseFriend = AnalyseFriend() analyseFriend.start()
def filt_by_lat_lon_checkin(self): InOut.console_func_begin("filt_by_lat_lon_checkin") setUser = self.load_filt_user_set() setVenue = self.load_filt_venue_set() file = join(Config.folderData, Config.folderDataParsed, Config.fileNorm + Config.fileCheckin) filtFile = join(Config.folderData, Config.folderDataParsed, Config.fileFilt + Config.fileCheckin) lineList = FileTool.FileTool.ReadLineListFromFile(file) lineListNew = [] for line in lineList: list = line.split("\t") uid = Venue.floatStrToInt(list[1]) vid= Venue.floatStrToInt(list[2]) if((uid in setUser) and (vid in setVenue)): lineListNew.append((line)) FileTool.FileTool.WriteStrListToFileWithNewLine(lineListNew, filtFile) pass
def processFileFormat(self): InOut.console_func_begin("processFileFormat") srcFolder = join(Config.folderData, Config.folderDataOri) dstFolder = join(Config.folderData, Config.folderDataParsed) for file in listdir(srcFolder): print "Process %s..." % file if(len(file) > 0 and file[0]=='.'): continue fileName = join(srcFolder, file) lineList = FileTool.ReadLineListFromFile(fileName, 2) fileNew = join(dstFolder,file) lineListNew = FileTool.ReplaceLineListSplitNorm(lineList, "|", "\t") FileTool.WriteStrListToFileWithNewLine(lineListNew, fileNew) #break; pass;
def checkVenueInDictCity(self): #print "debug 2" InOut.console_func_begin("checkVenueInDictCity") self.listVenue = self.loadVenueFromFileFilt() fileVenueCity = join(Config.folderData, Config.folderDataParsed, Config.fileCity + Config.fileVenue) cntNo=0 cnt = 0 index=0 totalCnt = len(self.listVenue) lineVenueCityList = [] for venue in self.listVenue: cnt = cnt + 1 #print len(self.dictCity) #print (user.id, user.latitude, user.longitude) loc = Location(venue.latitude, venue.longitude) city = Location.getLocCity(loc, self.dictCity) cityId = -1 if(city == None): #print (user.latitude, user.longitude) cntNo = cntNo + 1 else: cityId = city.id tmpList=[] tmpList.append(str(venue.venueId)) tmpList.append(str(venue.latitude)) tmpList.append(str(venue.longitude)) tmpList.append(str(cityId)) lineVenueCity = "\t".join(tmpList) lineVenueCityList.append(lineVenueCity) index = index + 1 if(index % 1000 == 0): print "%s/%s\tNo:%s/%s" % (index, totalCnt, cntNo, cnt) #break print "No: %s/%s" % (cntNo, cnt) FileTool.FileTool.WriteStrListToFileWithNewLine(lineVenueCityList, fileVenueCity)
def processFileFormat(self): InOut.console_func_begin("processFileFormat") srcFolder = join(Config.folderData, Config.folderDataOri) dstFolder = join(Config.folderData, Config.folderDataParsed) for file in listdir(srcFolder): print "Process %s..." % file if (len(file) > 0 and file[0] == '.'): continue fileName = join(srcFolder, file) lineList = FileTool.ReadLineListFromFile(fileName, 2) fileNew = join(dstFolder, file) lineListNew = FileTool.ReplaceLineListSplitNorm( lineList, "|", "\t") FileTool.WriteStrListToFileWithNewLine(lineListNew, fileNew) #break; pass
def filt_by_lat_lon_checkin(self): InOut.console_func_begin("filt_by_lat_lon_checkin") setUser = self.load_filt_user_set() setVenue = self.load_filt_venue_set() file = join(Config.folderData, Config.folderDataParsed, Config.fileNorm + Config.fileCheckin) filtFile = join(Config.folderData, Config.folderDataParsed, Config.fileFilt + Config.fileCheckin) lineList = FileTool.FileTool.ReadLineListFromFile(file) lineListNew = [] for line in lineList: list = line.split("\t") uid = Venue.floatStrToInt(list[1]) vid = Venue.floatStrToInt(list[2]) if ((uid in setUser) and (vid in setVenue)): lineListNew.append((line)) FileTool.FileTool.WriteStrListToFileWithNewLine(lineListNew, filtFile) pass
def start(cls): InOut.console_func_begin("Process") prePro = PreProcess() #prePro.start() #return pl = Plot() pl.start() return city = City() #city.start() resource = Resource() #resource.start() if(Config.flag_exp_generate_exp_data): getExpData = GetExpData() getExpData.start() if(Config.flag_exp_generate_ratio_data): getExpRatioData = GetExpRatioData() getExpRatioData.start() #return analyse = Analyse() #analyse.start() #return #Test.test() #return hli = HomeLocIdentify() hli.start() InOut.console_func_end("Process")
def load_city_dict(cls): InOut.console_func_begin("load_city_dict") fileCity = join(Config.folderData, Config.folderResourse, Config.fileNorm + Config.fileCityByPopulation) lineList = FileTool.FileTool.ReadLineListFromFile(fileCity) print len(lineList) dictCity={} for line in lineList: #no, name, state, population, landarea, popdensity, lat, lon, swlat, swlon, nelt, nelon strList = line.split("\t") #print(len(strList)) if(len(strList) != 12): print line continue city = City() city.set_value(strList) dictCity[city.id] = city return dictCity
def load_dict_user_checkin(cls, dictVenue, flagCheckinLocUseVenueLoc=False, flagExp=True): InOut.console_func_begin("load_dict_user_checkin") dictUserCheckin = {} fileCheckin = "" fileCheckin = join(Config.folderData, Config.folderDataParsed, Config.folderExp, Config.folderRatio, Config.fileCheckin) listCheckin = Checkin.loadCheckinFromFile(fileCheckin) print "listCheckin:", len(listCheckin) if(flagCheckinLocUseVenueLoc): listCheckin = LoadData.replace_checkin_loc_use_venue(listCheckin, dictVenue) for checkin in listCheckin: uid = checkin.user_id if(uid not in dictUserCheckin): dictUserCheckin[uid] = [] dictUserCheckin[uid].append(checkin) return dictUserCheckin
def load_dict_user_rating(cls, dictVenue, flagExp=True): InOut.console_func_begin("load_dict_user_rating") dictUserRating = {} fileRating = join(Config.folderData, Config.folderDataParsed, Config.folderExp, Config.folderRatio, Config.fileRating) listRating = Rating.loadRatingFromFile(fileRating) print "listRating:", len(listRating) for rating in listRating: uid = rating.user_id vid = rating.venue_id if(vid not in dictVenue): continue if(uid not in dictUserRating): dictUserRating[uid] = [] rating.latitude = dictVenue[vid].latitude rating.longitude = dictVenue[vid].longitude dictUserRating[uid].append(rating) return dictUserRating
def filt_by_lat_lon_file(self, file, latCol, lonCol): InOut.console_func_begin("filt_by_lat_lon_file " + file) fileNorm = join(Config.folderData, Config.folderDataParsed, Config.fileNorm + file) fileFilt = join(Config.folderData, Config.folderDataParsed, Config.fileFilt + file) lineList = FileTool.FileTool.ReadLineListFromFile(fileNorm) lineListNew = [] for line in lineList: #print "line:$%s$" % line list = line.split("\t") lat = list[latCol] lon = list[lonCol] #print "lat:$%s$" % lat #print "lon:$%s$" % lon if(Venue.isInFiltArea(Config.filtCountry, lat, lon) == True): lineListNew.append(line) #break FileTool.FileTool.WriteStrListToFileWithNewLine(lineListNew, fileFilt) pass
def filt_by_lat_lon_socialgraph(self): InOut.console_func_begin("filt_by_lat_lon_socialgraph") setUser = self.load_filt_user_set() fileSocialgraph = join(Config.folderData, Config.folderDataParsed, Config.fileSocialGraph) filtSocialgraph = join(Config.folderData, Config.folderDataParsed, Config.fileFilt + Config.fileSocialGraph) lineList = FileTool.FileTool.ReadLineListFromFile(fileSocialgraph) lineListNew = [] index=0 for line in lineList: index = index+1 list = line.split("\t") if(len(list) == 2): u1 = Venue.intStrToInt(list[0]) u2 = Venue.intStrToInt(list[1]) #print "u1:$%s$, u2:$%s$" % (u1, u2) if((u1 in setUser) and (u2 in setUser)): lineListNew.append(line) if(index % 10000 == 0): str = "%s/%s" % (index, len(lineList)) print str FileTool.FileTool.WriteStrListToFileWithNewLine(lineListNew, filtSocialgraph) pass
def filt_by_lat_lon_file(self, file, latCol, lonCol): InOut.console_func_begin("filt_by_lat_lon_file " + file) fileNorm = join(Config.folderData, Config.folderDataParsed, Config.fileNorm + file) fileFilt = join(Config.folderData, Config.folderDataParsed, Config.fileFilt + file) lineList = FileTool.FileTool.ReadLineListFromFile(fileNorm) lineListNew = [] for line in lineList: #print "line:$%s$" % line list = line.split("\t") lat = list[latCol] lon = list[lonCol] #print "lat:$%s$" % lat #print "lon:$%s$" % lon if (Venue.isInFiltArea(Config.filtCountry, lat, lon) == True): lineListNew.append(line) #break FileTool.FileTool.WriteStrListToFileWithNewLine(lineListNew, fileFilt) pass
def load_dict_friend(cls, flagExp = True, flagAsc = False): InOut.console_func_begin("load_dict_friend") res = {} file = join(Config.folderData, Config.folderDataParsed, Config.folderExp, Config.folderRatio, Config.fileSocialGraph) res = LoadData.load_dict_friend_file(file, flagAsc) return res
def load_dict_venue(cls, flagExp = True, flagRatio=True): InOut.console_func_begin("load_dict_venue") fileVenue = join(Config.folderData, Config.folderDataParsed, Config.folderExp, Config.folderRatio, Config.fileVenue) dictVenue = Venue.loadDictVenueFromNormFile(fileVenue) return dictVenue