def calculate_bluetooth_sim(user1, user2): sum_sim = 0.0 UserFile1 = [] UserFile2 = [] getdir = GetDirName() parent_path = os.path.dirname(os.getcwd()) user1Floder = (getdir.printPath(parent_path + os.sep + "starlog" + os.sep + user1)) user2Floder = (getdir.printPath(parent_path + os.sep + "starlog" + os.sep + user2)) user1filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user1 + os.sep user2filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user2 + os.sep try: for i in range(len(user1Floder)): u1file = user1filepath + user1Floder[ i] + os.sep + 'Processed_bluetooth.txt' if os.path.exists(u1file): UserFile1.append(u1file) for j in range(len(user1Floder)): u2file = user2filepath + user2Floder[ j] + os.sep + 'Processed_bluetooth.txt' if os.path.exists(u2file): UserFile2.append(u2file) except Exception, e: print e traceback.print_exc()
def drawfenkai(filename): from getDir import GetDirName import os getdir = GetDirName() parent_path = os.path.dirname(os.getcwd()) AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + '\\' + 'starlog') for path_file in other: TimePic = [] for i in range(len(path_file)): temp = [] path_file_name = parent_path + path_file[i] + os.sep + filename #print(path_file_name) data = np.loadtxt(path_file_name, dtype=str, delimiter=',', usecols=(0, 1, 2), unpack=False) start = data[1][1].split(' ')[0] for i in range(len(data)): tmp = [] tmp.append(canculateDate(data[i][1], start)) tmp.append(canculateDate(data[i][2], start)) tmp.append(data[i][0]) temp.append(tmp) TimePic.append(temp) print(path_file_name) drewPic(TimePic, path_file_name)
def drawfenkai(filename): from getDir import GetDirName import os getdir = GetDirName() parent_path = os.path.dirname(os.getcwd()) AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + "\\" + "starlog") for path_file in other: TimePic = [] for i in range(len(path_file)): temp = [] path_file_name = parent_path + path_file[i] + os.sep + filename # print(path_file_name) data = np.loadtxt(path_file_name, dtype=str, delimiter=",", usecols=(0, 1, 2), unpack=False) start = data[1][1].split(" ")[0] for i in range(len(data)): tmp = [] tmp.append(canculateDate(data[i][1], start)) tmp.append(canculateDate(data[i][2], start)) tmp.append(data[i][0]) temp.append(tmp) TimePic.append(temp) print(path_file_name) drewPic(TimePic, path_file_name)
def create_model(labelFileName='RClabelTime.txt'): FileName=[] from getDir import GetDirName import os getdir=GetDirName() parent_path = os.path.dirname(os.getcwd()) AllUserFiles,AllFiles,other=getdir.getUserFiles(parent_path+'\\'+'starlog') for path_file in other: for i in range(len(path_file)): path_file_name=parent_path+path_file[i]+os.sep+labelFileName FileName.append(path_file_name) train_set=[] for file in FileName: #一个file 就是一天的数据路径,也就是一天的数据,外层一个for循环就是 写找完一天的记录 ondaydata=np.loadtxt(file,dtype=str,delimiter=',',usecols=(0,1,3)) #label,starttime,continuetime tempsentence=[] for i in range(len(ondaydata)): datasstrip=ondaydata[i][1].split(' ') word=ondaydata[i][0]+'_'+datasstrip[0]+'_'+time2hour(datasstrip[1])+'_'+str(int(ondaydata[i][2])//(10*60)) tempsentence.append(word) train_set.append(tempsentence) dic = corpora.Dictionary(train_set) corpus = [dic.doc2bow(text) for text in train_set] tfidf = models.TfidfModel(corpus) corpus_tfidf = tfidf[corpus] lda = models.LdaModel(corpus_tfidf, id2word = dic, num_topics = 24) corpus_lda = lda[corpus_tfidf] lda.save(".\\LDA_all_27\\SemanticLda"+str(24)+".txt") dic.save(".\\LDA_all_27\\SemanticDic"+str(24)+".txt") tfidf.save(".\\LDA_all_27\\SemanticTFIDF"+str(24)+".txt") return lda,dic,tfidf,train_set,AllUserFiles
def getAprioriItem(minSupport=0.5,minConfidence=0.8): #minSupport=0.5,minConfidence=0.8 getdir=GetDirName() Fulldirlist=[] parent_path = os.path.dirname(os.getcwd()) #print(parent_path) dirlist=getdir.printPath(parent_path+os.sep+"starlog") #print(dirlist) for dir in dirlist: #print(dir) seconddir=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+dir)) for secdir in seconddir: Fulldirlist.append(parent_path+os.sep+"starlog"+os.sep+dir+os.sep+secdir+os.sep+'appriori_sequence.csv') #print(Fulldirlist) for infile in Fulldirlist: #infile is the path of appriori_sequence.csv #print(infile) sequence_Frequence=[] inFile = dataFromFile(infile) items, rules = runApriori(inFile, minSupport, minConfidence) #printResults(items, rules) savefile=open(infile.replace('appriori_sequence.csv','sequencefrequence.txt'),'w') for item, support in sorted(items, key=lambda (item, support): support): #print "item: %s , %.3f" % (str(item), support) seitem=list(item) if len(seitem)>1: sequence_Frequence.append(seitem) for index in range(len(seitem)-1): savefile.write(seitem[index]) savefile.write(',') savefile.write(seitem[len(seitem)-1]) savefile.write('\n')
def getAll_label2dic(): getdir=GetDirName() Fulldirlist=[] parent_path = os.path.dirname(os.getcwd()) print(parent_path) dirlist=getdir.printPath(parent_path+os.sep+"starlog") print(dirlist) for dir in dirlist: #print(dir) seconddir=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+dir)) for secdir in seconddir: Fulldirlist.append(parent_path+os.sep+"starlog"+os.sep+dir+os.sep+secdir+os.sep+'RCed_stoppoint.txt') #Fulldirlist 是的地方的的的飞艾丝凡安吉拉广发分撒娇课历史的高考了收到就会公司加快的更好 # #print(Fulldirlist) Fulldirlist is all of the user's label tag path #############this work want to canculate the dictionary of full label tags,which is like (pdl, bedroom ....) ---> ('1','2'......) ############# thus the list could be used to find the frequency item(trajectory model) from user, then translate the item ser to label set # order by dictionary ('1','2'......)--->(pdl, bedroom ....) allLabelSet=[] allLabelDic={} for filePath in Fulldirlist: labelTag=np.loadtxt(filePath,dtype=str,delimiter=',',usecols=(4,)) taglist=list(set(labelTag)) for item in taglist: if item not in allLabelSet: allLabelSet.append(item) allLabelSet=list(set(allLabelSet)) for index in range(len(allLabelSet)): allLabelDic[allLabelSet[index]]=str(index) output=open('alllabelDic.pkl','wb') pickle.dump(allLabelDic,output) output.close() return allLabelSet,allLabelDic
def calculate_user_sim_onBluetooth(): userlist = [] getdir = GetDirName() parent_path = os.path.dirname(os.getcwd()) AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + os.sep + 'starlog') for path_file in other: for i in range(len(path_file)): path_file_name = parent_path + path_file[ i] + os.sep + 'bluetooth.txt' if os.path.exists(path_file_name): userlist.append(path_file_name.split(os.sep)[-3]) userlist = list(set(userlist)) print userlist for i in range(len(userlist) - 1): for j in range(i, len(userlist)): print userlist[i], userlist[j] result = calculate_bluetooth_sim(userlist[i], userlist[j]) ans.write(userlist[i]) ans.write(',') ans.write(userlist[j]) ans.write(',') ans.write(str(result)) ans.write('\n')
def GetLabelFile(): getdir=GetDirName() Fulldirlist=[] parent_path = os.path.dirname(os.getcwd()) dirlist=getdir.printPath(parent_path+"\\GPS_Get_PreProcesser") for dir in dirlist: Fulldirlist.append(parent_path+"\\GPS_Get_PreProcesser"+"\\"+dir+"\\"+'semanticGPS_stoppoint.txt') return Fulldirlist
def GetLabelFile(): getdir = GetDirName() Fulldirlist = [] parent_path = os.path.dirname(os.getcwd()) dirlist = getdir.printPath(parent_path + "\\GPS_Get_PreProcesser") for dir in dirlist: Fulldirlist.append(parent_path + "\\GPS_Get_PreProcesser" + "\\" + dir + "\\" + 'semanticGPS_stoppoint.txt') return Fulldirlist
def getfullfilepath(): getdir = GetDirName() Fulldirlist = [] parent_path = os.path.dirname(os.getcwd()) dirlist = getdir.printPath(parent_path + "\\GPS_Get_PreProcesser") for dir in dirlist: Fulldirlist.append(parent_path + "\\GPS_Get_PreProcesser" + "\\" + dir + "\\" + "locationGPS.txt") # print(Fulldirlist) return Fulldirlist
def get_fenlei_user(): wifi_path=[] getdir=GetDirName() parent_path = os.path.dirname(os.getcwd()) AllUserFiles,AllFiles,other=getdir.getUserFiles(parent_path+os.sep+'starlog') for path_file in other: for i in range(len(path_file)): path_file_name=parent_path+path_file[i]+os.sep+'wifi.txt' wifi_path.append(path_file_name) return wifi_path
def getfullfilepath(): getdir = GetDirName() Fulldirlist = [] parent_path = os.path.dirname(os.getcwd()) dirlist = getdir.printPath(parent_path + "\\GPS_Get_PreProcesser") for dir in dirlist: Fulldirlist.append(parent_path + "\\GPS_Get_PreProcesser" + "\\" + dir + "\\" + 'locationGPS.txt') #print(Fulldirlist) return Fulldirlist
def GetSemanticGPSpath(): getdir = GetDirName() Fulldirlist = [] parent_path = os.path.dirname(os.getcwd()) dirlist = getdir.printPath(parent_path + os.sep + "GPS_Get_PreProcesser") for dir in dirlist: Fulldirlist.append( parent_path + os.sep + "GPS_Get_PreProcesser" + os.sep + dir + os.sep + 'semanticGPS.txt') #semanticGPS.txt是处理后的gps 加时间, 加label的文件信息 return Fulldirlist
def get_fenlei_user(): wifi_path = [] getdir = GetDirName() parent_path = os.path.dirname(os.getcwd()) AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + os.sep + "starlog") for path_file in other: for i in range(len(path_file)): path_file_name = parent_path + path_file[i] + os.sep + "bluetooth.txt" if os.path.exists(path_file_name): wifi_path.append(path_file_name) #####Trans_btoothjson_txt(path_file_name) first time to excue this func will create blueeth csv file with add model to do return wifi_path
def getUserFloderList(user0,user1): getdir=GetDirName() parent_path = os.path.dirname(os.getcwd()) user1Floder=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+user0)) user2Floder=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+user1)) user1filepath=parent_path+os.sep+"starlog"+os.sep+os.sep+user0+os.sep user2filepath=parent_path+os.sep+"starlog"+os.sep+os.sep+user1+os.sep num_floder=len(user1Floder) if len(user1Floder)<=len(user2Floder) else len(user2Floder) user1Floder.sort() user2Floder.sort() return simHashLabel(user1filepath,user2filepath,user1Floder,user2Floder,num_floder)
def getUserFloderpath(): getdir = GetDirName() Usernamepath = [] parent_path = os.path.dirname(os.getcwd()) # print(parent_path) dirlist = getdir.printPath(parent_path + os.sep + "starlog") usernameGroup = dirlist # print (dirlist) for dir in dirlist: # seconddir=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+dir)) # for secdir in seconddir: Usernamepath.append(parent_path + os.sep + "starlog" + os.sep + dir) return usernameGroup
def getUserFloderpath(): getdir = GetDirName() Usernamepath = [] parent_path = os.path.dirname(os.getcwd()) #print(parent_path) dirlist = getdir.printPath(parent_path + os.sep + "starlog") usernameGroup = dirlist #print (dirlist) for dir in dirlist: # seconddir=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+dir)) #for secdir in seconddir: Usernamepath.append(parent_path + os.sep + "starlog" + os.sep + dir) return (usernameGroup)
def canclulate_wifi(user1, user2): getdir = GetDirName() parent_path = os.path.dirname(os.getcwd()) user1Floder = getdir.printPath(parent_path + os.sep + "starlog" + os.sep + user1) user2Floder = getdir.printPath(parent_path + os.sep + "starlog" + os.sep + user2) user1filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user1 + os.sep user2filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user2 + os.sep num_floder = len(user1Floder) if len(user1Floder) <= len(user2Floder) else len(user2Floder) sum_sim = 0.0 for i in range(num_floder): u1file = user1filepath + user1Floder[i] + os.sep + "Downwifi.txt" u2file = user2filepath + user2Floder[i] + os.sep + "Downwifi.txt" sum_sim += daily_wifi_sim(u1file, u2file) return sum_sim
def canclulate_wifi(user1,user2): getdir=GetDirName() parent_path = os.path.dirname(os.getcwd()) user1Floder=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+user1)) user2Floder=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+user2)) user1filepath=parent_path+os.sep+"starlog"+os.sep+os.sep+user1+os.sep user2filepath=parent_path+os.sep+"starlog"+os.sep+os.sep+user2+os.sep num_floder=len(user1Floder) if len(user1Floder)<=len(user2Floder) else len(user2Floder) sum_sim=0.0 for i in range(num_floder): u1file=user1filepath+user1Floder[i]+os.sep+'Downwifi.txt' u2file=user2filepath+user2Floder[i]+os.sep+'Downwifi.txt' sum_sim+= daily_wifi_sim(u1file,u2file) return sum_sim
def mian(): from Semantics_of_Trajectories import Calculate_semantic_of_point from stop_points import getfullfilepath full = getfullfilepath() for n in range(len(full)): path_file = full[n].replace("locationGPS.txt", "location.txt") gps_data, timestamp, accur = get_data(path_file) # print len(accur) # print len(timestamp) labels, SP = getStayPoint(gps_data, timestamp, disthreshold=90, timethreshold=180) # print len(labels) # print(len(labels)-1) print (full[n]) labels.append(labels[len(labels) - 1]) #!!!!!!!!注意,labels的长度可能为0 好奇怪 # print len(labels) # print len(SP) # print SP stoppointlabel = [] labDIC = {} for i in range(len(SP)): value = Calculate_semantic_of_point.Match_semantics(SP[i], 90) stoppointlabel.append(value) labDIC[i] = value init_rs_staypoint_time(labels, gps_data, timestamp, accur, SP, labDIC, path_file) # 分类的用户的文件夹 from getDir import GetDirName import os getdir = GetDirName() parent_path = os.path.dirname(os.getcwd()) AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + "\\" + "starlog") for path_file in other: for i in range(len(path_file)): path_file_name = parent_path + path_file[i] + os.sep + "location.txt" gps_data, timestamp, accur = get_data(path_file_name) labels, SP = getStayPoint(gps_data, timestamp, disthreshold=90, timethreshold=180) print (path_file[i]) labels.append(labels[len(labels) - 1]) #!!!!!!!!注意,labels的长度可能为0 好奇怪 stoppointlabel = [] labDIC = {} for i in range(len(SP)): value = Calculate_semantic_of_point.Match_semantics(SP[i], 90) stoppointlabel.append(value) labDIC[i] = value init_rs_staypoint_time(labels, gps_data, timestamp, accur, SP, labDIC, path_file_name)
def mian(): from Semantics_of_Trajectories import Calculate_semantic_of_point from stop_points import getfullfilepath full=getfullfilepath() for n in range(len(full)): path_file=full[n].replace('locationGPS.txt','location.txt') gps_data,timestamp,accur = get_data(path_file) # print len(accur) # print len(timestamp) labels,SP = getStayPoint(gps_data,timestamp,disthreshold=90,timethreshold=180) #print len(labels) #print(len(labels)-1) print(full[n]) labels.append(labels[len(labels)-1]) #!!!!!!!!注意,labels的长度可能为0 好奇怪 # print len(labels) # print len(SP) # print SP stoppointlabel=[] labDIC={} for i in range(len(SP)): value=Calculate_semantic_of_point.Match_semantics(SP[i],90) stoppointlabel.append(value) labDIC[i]=value init_rs_staypoint_time(labels,gps_data,timestamp,accur,SP,labDIC,path_file) #分类的用户的文件夹 from getDir import GetDirName import os getdir=GetDirName() parent_path = os.path.dirname(os.getcwd()) AllUserFiles,AllFiles,other=getdir.getUserFiles(parent_path+'\\'+'starlog') for path_file in other: for i in range(len(path_file)): path_file_name=parent_path+path_file[i]+os.sep+'location.txt' gps_data,timestamp,accur = get_data(path_file_name) labels,SP = getStayPoint(gps_data,timestamp,disthreshold=90,timethreshold=180) print(path_file[i]) labels.append(labels[len(labels)-1]) #!!!!!!!!注意,labels的长度可能为0 好奇怪 stoppointlabel=[] labDIC={} for i in range(len(SP)): value=Calculate_semantic_of_point.Match_semantics(SP[i],90) stoppointlabel.append(value) labDIC[i]=value init_rs_staypoint_time(labels,gps_data,timestamp,accur,SP,labDIC,path_file_name)
def main(): from label_add_time import GetSemanticGPSpath full=GetSemanticGPSpath() for n in range(len(full)): path_file=full[n].replace('semanticGPS.txt','RC_stoppoint.txt') print(path_file) Read_RC_stoppoint(path_file) print 'ok.....have process over' #分类的用户的文件夹 from getDir import GetDirName import os getdir=GetDirName() parent_path = os.path.dirname(os.getcwd()) AllUserFiles,AllFiles,other=getdir.getUserFiles(parent_path+'\\'+'starlog') for path_file in other: for i in range(len(path_file)): path_file_name=parent_path+path_file[i]+os.sep+'RC_stoppoint.txt' Read_RC_stoppoint(path_file_name) print 'ok.....have process over'
def initAprioriitem(): getdir = GetDirName() Fulldirlist = [] parent_path = os.path.dirname(os.getcwd()) #print(parent_path) dirlist = getdir.printPath(parent_path + os.sep + "starlog") #print(dirlist) for dir in dirlist: #print(dir) seconddir = (getdir.printPath(parent_path + os.sep + "starlog" + os.sep + dir)) for secdir in seconddir: Fulldirlist.append(parent_path + os.sep + "starlog" + os.sep + dir + os.sep + secdir + os.sep + 'RCed_stoppoint.txt') for path in Fulldirlist: data = np.loadtxt(path, dtype=str, delimiter=',', usecols=(3, 4)) data_moring = [] data_noon = [] data_night = [] i = 1 for item in data: labtltime = str2timeNum( item[0] ) # here labeltime is a number as we cut one hour into two pices of time(30 min) if labtltime >= 0 and labtltime <= 8 * 60: #item is ['2015-07-07 00:00:00', '5_bedroom'] data_moring.append(item[1]) elif labtltime > 8 * 60 and labtltime <= 16 * 60: data_noon.append(item[1]) elif labtltime > 16 * 60 and labtltime < 24 * 60: data_night.append(item[1]) # print(data_night) # print(data_moring) savefile = open( path.replace('RCed_stoppoint.txt', 'appriori_sequence.csv'), 'wb') SaveFile = csv.writer(savefile) SaveFile.writerow(data_moring) SaveFile.writerow(data_noon) SaveFile.writerow(data_night) savefile.close()
def RC_Label_Time_process(): filelist = GetSemanticGPSpath() lda = ldaHelper() for file in filelist: f = file.replace('semanticGPS.txt', 'RC_stoppoint.txt') write_labelTime2file(lda.Add_RCtimestamp(f), f) print('have done %s' % f) #分类的用户的文件夹 from getDir import GetDirName import os getdir = GetDirName() parent_path = os.path.dirname(os.getcwd()) AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + '\\' + 'starlog') for path_file in other: for i in range(len(path_file)): path_file_name = parent_path + path_file[ i] + os.sep + 'RC_stoppoint.txt' write_labelTime2file(lda.Add_RCtimestamp(path_file_name), path_file_name)
def main(): from label_add_time import GetSemanticGPSpath full = GetSemanticGPSpath() for n in range(len(full)): path_file = full[n].replace('semanticGPS.txt', 'RC_stoppoint.txt') print(path_file) Read_RC_stoppoint(path_file) print 'ok.....have process over' #分类的用户的文件夹 from getDir import GetDirName import os getdir = GetDirName() parent_path = os.path.dirname(os.getcwd()) AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + '\\' + 'starlog') for path_file in other: for i in range(len(path_file)): path_file_name = parent_path + path_file[ i] + os.sep + 'RC_stoppoint.txt' Read_RC_stoppoint(path_file_name) print 'ok.....have process over'
def getAll_label2dic(): getdir = GetDirName() Fulldirlist = [] parent_path = os.path.dirname(os.getcwd()) print(parent_path) dirlist = getdir.printPath(parent_path + os.sep + "starlog") print(dirlist) for dir in dirlist: #print(dir) seconddir = (getdir.printPath(parent_path + os.sep + "starlog" + os.sep + dir)) for secdir in seconddir: Fulldirlist.append( parent_path + os.sep + "starlog" + os.sep + dir + os.sep + secdir + os.sep + 'RCed_stoppoint.txt' ) #Fulldirlist 是的地方的的的飞艾丝凡安吉拉广发分撒娇课历史的高考了收到就会公司加快的更好 # #print(Fulldirlist) Fulldirlist is all of the user's label tag path #############this work want to canculate the dictionary of full label tags,which is like (pdl, bedroom ....) ---> ('1','2'......) ############# thus the list could be used to find the frequency item(trajectory model) from user, then translate the item ser to label set # order by dictionary ('1','2'......)--->(pdl, bedroom ....) allLabelSet = [] allLabelDic = {} for filePath in Fulldirlist: labelTag = np.loadtxt(filePath, dtype=str, delimiter=',', usecols=(4, )) taglist = list(set(labelTag)) for item in taglist: if item not in allLabelSet: allLabelSet.append(item) allLabelSet = list(set(allLabelSet)) for index in range(len(allLabelSet)): allLabelDic[allLabelSet[index]] = str(index) output = open('alllabelDic.pkl', 'wb') pickle.dump(allLabelDic, output) output.close() return allLabelSet, allLabelDic
def frequence_mod_sim(user1, user2): getdir = GetDirName() parent_path = os.path.dirname(os.getcwd()) user1Floder = (getdir.printPath(parent_path + os.sep + "starlog" + os.sep + user1)) user2Floder = (getdir.printPath(parent_path + os.sep + "starlog" + os.sep + user2)) user1filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user1 + os.sep user2filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user2 + os.sep num_floder = len(user1Floder) if len(user1Floder) <= len( user2Floder) else len(user2Floder) similary_dgree = 0.0 for Floderindex in range(num_floder): t_u1_file = user1filepath + user1Floder[ Floderindex] + os.sep + 'itemfrequence.txt' #get use's itemfrequence data to caclulate simliarty t_u2_file = user2filepath + user2Floder[ Floderindex] + os.sep + 'itemfrequence.txt' # if len(user1Floder)<=len(user2Floder): # t_u1_file=user1filepath+user1Floder[Floderindex]+os.sep+'itemfrequence.txt' #get use's itemfrequence data to caclulate simliarty # t_u2_file=user2filepath+user2Floder[Floderindex+len(user2Floder)-len(user1Floder)]+os.sep+'itemfrequence.txt' # elif len(user1Floder)>len(user2Floder): # t_u1_file=user1filepath+user1Floder[Floderindex+len(user1Floder)-len(user2Floder)]+os.sep+'itemfrequence.txt' #get use's itemfrequence data to caclulate simliarty # t_u2_file=user2filepath+user2Floder[Floderindex]+os.sep+'itemfrequence.txt' listuser1 = [] listuser2 = [] listinlisetuser1 = [] listinlisetuser2 = [] for line in open(t_u1_file): listuser1.extend(line.replace("\n", "").split(',')) listinlisetuser1.append(line.replace("\n", "").split(',')) for line in open(t_u2_file): listuser2.extend(line.replace("\n", "").split(',')) listinlisetuser2.append(line.replace("\n", "").split(',')) similary_dgree += currentFileSim(listuser1, listuser2, listinlisetuser1, listinlisetuser2) return similary_dgree
def create_model(labelFileName='RClabelTime.txt'): FileName = [] from getDir import GetDirName import os getdir = GetDirName() parent_path = os.path.dirname(os.getcwd()) AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + '\\' + 'starlog') for path_file in other: for i in range(len(path_file)): path_file_name = parent_path + path_file[i] + os.sep + labelFileName FileName.append(path_file_name) train_set = [] for file in FileName: #一个file 就是一天的数据路径,也就是一天的数据,外层一个for循环就是 写找完一天的记录 ondaydata = np.loadtxt(file, dtype=str, delimiter=',', usecols=(0, 1, 3)) #label,starttime,continuetime tempsentence = [] for i in range(len(ondaydata)): datasstrip = ondaydata[i][1].split(' ') word = ondaydata[i][0] + '_' + datasstrip[0] + '_' + time2hour( datasstrip[1]) + '_' + str(int(ondaydata[i][2]) // (10 * 60)) tempsentence.append(word) train_set.append(tempsentence) dic = corpora.Dictionary(train_set) corpus = [dic.doc2bow(text) for text in train_set] tfidf = models.TfidfModel(corpus) corpus_tfidf = tfidf[corpus] lda = models.LdaModel(corpus_tfidf, id2word=dic, num_topics=24) corpus_lda = lda[corpus_tfidf] lda.save(".\\LDA_all_27\\SemanticLda" + str(24) + ".txt") dic.save(".\\LDA_all_27\\SemanticDic" + str(24) + ".txt") tfidf.save(".\\LDA_all_27\\SemanticTFIDF" + str(24) + ".txt") return lda, dic, tfidf, train_set, AllUserFiles
def collect_all2one(): ###################here we collect all of day's data and shows into one file############# getdir=GetDirName() Fulldirlist=[] parent_path = os.path.dirname(os.getcwd()) dirlist=getdir.printPath(parent_path+os.sep+"starlog") for dir in dirlist: seconddir=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+dir)) for secdir in seconddir: Fulldirlist.append(parent_path+os.sep+"starlog"+os.sep+dir+os.sep+secdir+os.sep+'sequence_pattern.txt') resultPath=[] for s in Fulldirlist: temppath=s[0:s[0:s.rfind(os.sep)].rfind(os.sep)]+os.sep+'allsequencepattern.txt' resultPath.append(temppath) output=open(temppath,'a') for line in open(s,'r'): output.writelines(line) output.close() resultFloder=list(set(resultPath)) detect_Sequencepattern(resultFloder,0.01)
def getAprioriItem(minSupport=0.5, minConfidence=0.8): #minSupport=0.5,minConfidence=0.8 getdir = GetDirName() Fulldirlist = [] parent_path = os.path.dirname(os.getcwd()) #print(parent_path) dirlist = getdir.printPath(parent_path + os.sep + "starlog") #print(dirlist) for dir in dirlist: #print(dir) seconddir = (getdir.printPath(parent_path + os.sep + "starlog" + os.sep + dir)) for secdir in seconddir: Fulldirlist.append(parent_path + os.sep + "starlog" + os.sep + dir + os.sep + secdir + os.sep + 'appriori_sequence.csv') #print(Fulldirlist) for infile in Fulldirlist: #infile is the path of appriori_sequence.csv #print(infile) sequence_Frequence = [] inFile = dataFromFile(infile) items, rules = runApriori(inFile, minSupport, minConfidence) #printResults(items, rules) savefile = open( infile.replace('appriori_sequence.csv', 'sequencefrequence.txt'), 'w') for item, support in sorted(items, key=lambda (item, support): support): #print "item: %s , %.3f" % (str(item), support) seitem = list(item) if len(seitem) > 1: sequence_Frequence.append(seitem) for index in range(len(seitem) - 1): savefile.write(seitem[index]) savefile.write(',') savefile.write(seitem[len(seitem) - 1]) savefile.write('\n')
def calculate_bluetooth_sim(user1,user2): sum_sim = 0.0 UserFile1=[] UserFile2=[] getdir=GetDirName() parent_path = os.path.dirname(os.getcwd()) user1Floder=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+user1)) user2Floder=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+user2)) user1filepath=parent_path+os.sep+"starlog"+os.sep+os.sep+user1+os.sep user2filepath=parent_path+os.sep+"starlog"+os.sep+os.sep+user2+os.sep try: for i in range(len(user1Floder)): u1file=user1filepath+user1Floder[i]+os.sep+'Processed_bluetooth.txt' if os.path.exists(u1file): UserFile1.append(u1file) for j in range(len(user1Floder)): u2file=user2filepath+user2Floder[j]+os.sep+'Processed_bluetooth.txt' if os.path.exists(u2file): UserFile2.append(u2file) except Exception ,e: print e traceback.print_exc()
def calculate_user_sim_onBluetooth(): userlist = [] getdir = GetDirName() parent_path = os.path.dirname(os.getcwd()) AllUserFiles,AllFiles,other = getdir.getUserFiles(parent_path+os.sep+'starlog') for path_file in other: for i in range(len(path_file)): path_file_name = parent_path+path_file[i]+os.sep+'bluetooth.txt' if os.path.exists(path_file_name): userlist.append(path_file_name.split(os.sep)[-3]) userlist = list(set(userlist)) print userlist for i in range(len(userlist)-1): for j in range(i,len(userlist)): print userlist[i],userlist[j] result= calculate_bluetooth_sim(userlist[i],userlist[j]) ans.write(userlist[i]) ans.write(',') ans.write(userlist[j]) ans.write(',') ans.write(str(result)) ans.write('\n')
def initAprioriitem(): getdir=GetDirName() Fulldirlist=[] parent_path = os.path.dirname(os.getcwd()) #print(parent_path) dirlist=getdir.printPath(parent_path+os.sep+"starlog") #print(dirlist) for dir in dirlist: #print(dir) seconddir=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+dir)) for secdir in seconddir: Fulldirlist.append(parent_path+os.sep+"starlog"+os.sep+dir+os.sep+secdir+os.sep+'RCed_stoppoint.txt') for path in Fulldirlist: data=np.loadtxt(path,dtype=str,delimiter=',',usecols=(3,4)) data_moring=[] data_noon=[] data_night=[] i=1 for item in data: labtltime=str2timeNum(item[0]) # here labeltime is a number as we cut one hour into two pices of time(30 min) if labtltime>=0 and labtltime <=8*60: #item is ['2015-07-07 00:00:00', '5_bedroom'] data_moring.append(item[1]) elif labtltime >8*60 and labtltime<= 16*60: data_noon.append(item[1]) elif labtltime >16*60 and labtltime< 24*60: data_night.append(item[1]) # print(data_night) # print(data_moring) savefile=open(path.replace('RCed_stoppoint.txt','appriori_sequence.csv'),'wb') SaveFile=csv.writer(savefile) SaveFile.writerow(data_moring) SaveFile.writerow(data_noon) SaveFile.writerow(data_night) savefile.close()
def frequence_mod_sim(user1, user2): getdir = GetDirName() parent_path = os.path.dirname(os.getcwd()) user1Floder = getdir.printPath(parent_path + os.sep + "starlog" + os.sep + user1) user2Floder = getdir.printPath(parent_path + os.sep + "starlog" + os.sep + user2) user1filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user1 + os.sep user2filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user2 + os.sep num_floder = len(user1Floder) if len(user1Floder) <= len(user2Floder) else len(user2Floder) similary_dgree = 0.0 for Floderindex in range(num_floder): t_u1_file = ( user1filepath + user1Floder[Floderindex] + os.sep + "itemfrequence.txt" ) # get use's itemfrequence data to caclulate simliarty t_u2_file = user2filepath + user2Floder[Floderindex] + os.sep + "itemfrequence.txt" # if len(user1Floder)<=len(user2Floder): # t_u1_file=user1filepath+user1Floder[Floderindex]+os.sep+'itemfrequence.txt' #get use's itemfrequence data to caclulate simliarty # t_u2_file=user2filepath+user2Floder[Floderindex+len(user2Floder)-len(user1Floder)]+os.sep+'itemfrequence.txt' # elif len(user1Floder)>len(user2Floder): # t_u1_file=user1filepath+user1Floder[Floderindex+len(user1Floder)-len(user2Floder)]+os.sep+'itemfrequence.txt' #get use's itemfrequence data to caclulate simliarty # t_u2_file=user2filepath+user2Floder[Floderindex]+os.sep+'itemfrequence.txt' listuser1 = [] listuser2 = [] listinlisetuser1 = [] listinlisetuser2 = [] for line in open(t_u1_file): listuser1.extend(line.replace("\n", "").split(",")) listinlisetuser1.append(line.replace("\n", "").split(",")) for line in open(t_u2_file): listuser2.extend(line.replace("\n", "").split(",")) listinlisetuser2.append(line.replace("\n", "").split(",")) similary_dgree += currentFileSim(listuser1, listuser2, listinlisetuser1, listinlisetuser2) return similary_dgree
file_processGPS.writelines(',') file_processGPS.writelines('0.0') file_processGPS.write('\n') file_processGPS.close() def drewgps(weidu, jindu, date): fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(jindu, weidu, c='r', marker='.') plt.title('location of : %s' % date) plt.savefig('.\\GPS_pic\\' + date + '.png', dpi=800) plt.close() #plt.show() if __name__ == '__main__': getdir = GetDirName() dirlist = [] dirlist = getdir.printPath(".\\GPS_Get_PreProcesser") print(dirlist) for wenjianjia in dirlist: print ".\\GPS_Get_PreProcesser" + '\\' + wenjianjia + '\\' + 'location.txt', wenjianjia readfile( ".\\GPS_Get_PreProcesser" + '\\' + wenjianjia + '\\' + 'location.txt', wenjianjia) #下面是写分开每个用户的结果哦 AllUserFiles, AllFiles, other = getdir.getUserFiles() for wenjianjia in AllFiles: readfile(wenjianjia + os.sep + 'location.txt', wenjianjia)
def getFolderNum(username): getdir = GetDirName() dirlist = getdir.printPath(username) print(dirlist)
file_processGPS.writelines(',') file_processGPS.writelines(i[1]) file_processGPS.writelines(',') file_processGPS.writelines(i[2]) file_processGPS.writelines(',') file_processGPS.writelines(i[3]) file_processGPS.writelines(',') file_processGPS.writelines('0.0') file_processGPS.write('\n') file_processGPS.close() def drewgps(weidu,jindu,date): fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(jindu,weidu,c='r',marker='.') plt.title('location of : %s' % date) plt.savefig('.\\GPS_pic\\'+date+'.png',dpi=800) plt.close() #plt.show() if __name__=='__main__': getdir=GetDirName() dirlist=[] dirlist=getdir.printPath(".\\GPS_Get_PreProcesser") print(dirlist) for wenjianjia in dirlist: print ".\\GPS_Get_PreProcesser"+ '\\' + wenjianjia + '\\' + 'location.txt',wenjianjia readfile(".\\GPS_Get_PreProcesser"+ '\\' + wenjianjia + '\\' + 'location.txt',wenjianjia) #下面是写分开每个用户的结果哦 AllUserFiles,AllFiles,other=getdir.getUserFiles() for wenjianjia in AllFiles: readfile(wenjianjia + os.sep+ 'location.txt',wenjianjia)
writeans([temppath_index,tt,re]) print('-----------------------我是分割线-----------------------------') def writeans(out): output=open('network.txt','a+') output.write(str(out[0])) output.write(',') output.write(str(out[1])) output.write(',') output.write(str(out[2])) output.write('\n') output.close() if __name__=='__main__': Fulldirlist=[] getdir=GetDirName() dirlist=getdir.printPath(".\\GPS_Get_PreProcesser") for dir in dirlist: Fulldirlist.append(".\\GPS_Get_PreProcesser"+"\\"+dir+"\\"+'RCed_stoppoint.txt') print(Fulldirlist) ''' fullpath=stop_points.getfullfilepath() tra1,tra2=Get_Prime_GpsData(".\\GPS_Get_PreProcesser\\10-21-2015\\locationGPS.txt",".\\GPS_Get_PreProcesser\\10-20-2015\\locationGPS.txt") testdtw(tra1,tra2) tra3,tra4=Get_Prime_GpsData(".\\GPS_Get_PreProcesser\\7-1-2015\\locationGPS.txt",".\\GPS_Get_PreProcesser\\10-20-2015\\locationGPS.txt") testdtw(tra3,tra4) ''' #tra1,tra2=Get_Prime_GpsData(".\\GPS_Get_PreProcesser\\10-01-2015\\locationGPS.txt",".\\GPS_Get_PreProcesser\\10-02-2015\\locationGPS.txt")
savefile.write(seitem[len(seitem) - 1]) savefile.write('\n') #savefile=open(infile.replace('appriori_sequence.csv','sequencefrequence.txt'),'w') # for subitem in # savefile.write(str(sequence_Frequence)) # savefile.close() #return sequence_Frequence if __name__ == '__main__': #initAprioriitem() # # print getAll_label2dic() getdir = GetDirName() Fulldirlist = [] parent_path = os.path.dirname(os.getcwd()) #print(parent_path) dirlist = getdir.printPath(parent_path + os.sep + "starlog") #print(dirlist) for dir in dirlist: #print(dir) seconddir = (getdir.printPath(parent_path + os.sep + "starlog" + os.sep + dir)) for secdir in seconddir: Fulldirlist.append(parent_path + os.sep + "starlog" + os.sep + dir + os.sep + secdir + os.sep + 'RCed_stoppoint.txt') for i in Fulldirlist:
if __name__=='__main__': #initAprioriitem() # # print getAll_label2dic() getdir=GetDirName() Fulldirlist=[] parent_path = os.path.dirname(os.getcwd()) #print(parent_path) dirlist=getdir.printPath(parent_path+os.sep+"starlog") #print(dirlist) for dir in dirlist: #print(dir) seconddir=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+dir)) for secdir in seconddir: Fulldirlist.append(parent_path+os.sep+"starlog"+os.sep+dir+os.sep+secdir+os.sep+'RCed_stoppoint.txt') for i in Fulldirlist: print(i) dic_labelTag,frequentSet= getFrequentItem(i)