def pfppic(): if not 'username' in session: return redirect('/') url = request.form['image'] reader.write_file('util/pfpimg.txt', session['username'] + ',' + url + '\n', 'a') return redirect('/account/' + session['username'])
def sendmessage(usr): reader.write_file('./util/' + usr + 'message.txt', '', 'a') url = '/account/' + usr + '/sendmessage' if not 'username' in session: return redirect('/') user_list = reader.getCsvDict('./util/credentials.txt').keys() messages = reader.read_file('./util/' + usr + 'message.txt') messages = messages.split('\n') messages.pop(-1) if messages == ['']: out = False else: out = True if request.method == 'GET': return render_template('messages.html', dir=url, messages=messages, out=out) elif request.method == 'POST': if not request.form['recipient'] in user_list: return render_template('messages.html', dir=url, messages=messages, out=out) mess.sendMessage(session['username'], request.form['recipient'], request.form['message']) return redirect(url)
def account(usr): if not 'username' in session: return redirect('/') user_list = reader.getCsvDict("./util/credentials.txt") if not usr in user_list.keys(): return render_template( "error.html", error="The username you have provided does not exist.", globe=globe) img = reader.getCsvDict('util/pfpimg.txt') userinfo = user_list[usr] gender = userinfo[1] Countryin = userinfo[2] Target = userinfo[3] url = '/account/' + session['username'] + '/settings' if session['username'] == usr: own = True else: own = False if usr in img: img = img[usr][0] else: img = 'http://s3-static-ak.buzzfed.com/static/2014-07/14/12/campaign_images/webdr09/meet-lunita-the-cutest-baby-sloth-on-planet-earth-2-9684-1405357019-4_big.jpg' return render_template("account.html", user=usr, user_list=user_list, globe=globe, img=img, gender=gender, Country=Countryin, target=Target, own=own, dir=url)
def exec(self,command): if not self.__connection.isAuthenticationComplete(): print "Connection not established" return if self.__session == None: self.__session = self.__connection.openSession() sess = self.__session if type(command) is type([]): # if command is a list make it a string command = " ".join(command) # make environment variables to string and assemble command environment = " ".join(["=".join(i) for i in self.__env]) command = "export " + environment + " && " + command sess.execCommand(command) # execute command self.__outputwriter = DataOutputStream(sess.getStdin()) # start a new thread for the input stream of the process and set the # Reader self.__instr = StreamGobbler(sess.getStdout()) self.__inputreader = Reader(BufferedReader(InputStreamReader(self.__instr))) # start a new thread for error stream of the process and set the # Reader self.__errstr = StreamGobbler(sess.getStderr()) self.__errorreader = Reader(BufferedReader(InputStreamReader(self.__errstr)))
def genName(self): # ensure proper encoding # read file, needs to be encoded by utf_8 f = codecs.open(DATA_PATH + "NORDIC_H.txt", encoding='utf_8') firstName = Reader.random_line(f) firstName = firstName.rstrip() f = codecs.open(DATA_PATH + "NORDIC_H.txt", encoding='utf_8') lastName = Reader.random_line(f) lastName = lastName.rstrip() return firstName + " " + lastName
def getPiePieces(): """Classifies the relative time difference into pieces (intervals) used for drawing the pie chart.""" taxis = reader.readAnalysisInfo() pieces = [0, 0, 0, 0, 0, 0] for taxi in taxis: try: diff = getTimeDiff(taxi.getSteps()) except TypeError as e: print("Error by taxi %s : %s" % (taxi.id, e.message)) # classify the relative time difference #<10%', '10%-30%', '30%-50%', '50%-70%', '70%-90%', '>90% if diff < 10: pieces[0] += 1 elif diff < 30: pieces[1] += 1 elif diff < 50: pieces[2] += 1 elif diff < 70: pieces[3] += 1 elif diff < 90: pieces[4] += 1 else: pieces[5] += 1 print(pieces) print(sum(pieces)) return pieces
def getBars(): """Classifies the time difference in single bars.""" taxis = reader.readAnalysisInfo(WEE) barsDict = {} barsDictSim = {} stdDev = [] mw = [] for taxi in taxis: if len(taxi.getSteps()) < 1: continue try: #diff=getTimeDiff(taxi.getSteps(),False) diffSim, fcd, sim, no = getTimeDiff(taxi.getSteps()) #anna if diffSim > 150: print diffSim, " ", taxi.id, " ", no, " ", fcd, " ", sim #standard deviation stdDev.append((diffSim - 9.46) * (diffSim - 9.46)) mw.append(diffSim) #classify the absolute time difference #barsDict[(diff/10)*10]=barsDict.setdefault((diff/10)*10,0)+1 barsDictSim[(diffSim / 10) * 10] = barsDictSim.setdefault( (diffSim / 10) * 10, 0) + 1 except TypeError, e: tueNichts = True
def getBars(): """Classifies the time difference in single bars.""" taxis=reader.readAnalysisInfo(WEE) barsDict={} barsDictSim={} stdDev=[] mw=[] for taxi in taxis: if len(taxi.getSteps())<1: continue try: #diff=getTimeDiff(taxi.getSteps(),False) diffSim,fcd,sim,no=getTimeDiff(taxi.getSteps()) #anna if diffSim>150: print diffSim," ",taxi.id," ",no," ",fcd," ",sim #standard deviation stdDev.append((diffSim-9.46)*(diffSim-9.46)) mw.append(diffSim) #classify the absolute time difference #barsDict[(diff/10)*10]=barsDict.setdefault((diff/10)*10,0)+1 barsDictSim[(diffSim/10)*10]=barsDictSim.setdefault((diffSim/10)*10,0)+1 except TypeError, e: tueNichts=True
def home(): if not 'username' in session: return redirect('/') user_list = reader.getCsvDict('./util/credentials.txt') current = user_list[session['username']][3] del user_list[session['username']] g = 0 rect = False rec = [] for i in user_list.keys(): if user_list[i] == current: rec.append(user_list.items()[g][0]) rect = True g += 1 if rec != []: rec = choice(rec) usr = session['username'] url = '/account/' + usr + '/sendmessage' if not rect: return render_template('home.html', user=session['username'], prof='/account/' + session['username'], recomended=rect, dir=url) return render_template('home.html', user=session['username'], prof='/account/' + session['username'], rec='/account/' + rec, recomended=rect, dir=url)
def getBarsMulti(): """Classifies the time difference in single bars. But uses insted of getBars() several analysis-File and calculates a mean value""" fileIter = iglob( path.newPath( path.main, "auswertung/reisezeit/analysisFiles/taxiAnalysisInformation*.xml")) fcdDiffDict = {} simDiffDict = {} barsDict = {} barsDictSim = {} stdDev = [] mw = [] #calc diffs for file in fileIter: #for each path.analysisWEE = path.newPath(file) print path.analysisWEE taxis = reader.readAnalysisInfo(WEE) for taxi in taxis: if len(taxi.getSteps()) < 1: continue try: #diff=getTimeDiff(taxi.getSteps(),False) diffSim, fcd, sim, no = getTimeDiff(taxi.getSteps()) simDiffDict.setdefault(taxi.id, []).append(sim) fcdDiffDict.setdefault(taxi.id, fcd) except TypeError, e: tueNichts = True
def getBars(): """Classifies the time difference in single bars.""" taxis = reader.readAnalysisInfo(WEE) barsDict = {} barsDictSim = {} stdDev = [] mw = [] for taxi in taxis: if len(taxi.getSteps()) < 1: continue try: # diff=getTimeDiff(taxi.getSteps(),False) diffSim, fcd, sim, no = getTimeDiff(taxi.getSteps()) # anna if diffSim > 150: print(diffSim, " ", taxi.id, " ", no, " ", fcd, " ", sim) # standard deviation stdDev.append((diffSim - 9.46) * (diffSim - 9.46)) mw.append(diffSim) # classify the absolute time difference # barsDict[(diff/10)*10]=barsDict.setdefault((diff/10)*10,0)+1 barsDictSim[(diffSim / 10) * 10] = barsDictSim.setdefault( (diffSim / 10) * 10, 0) + 1 except TypeError as e: tueNichts = True # print "Error by taxi %s : %s" %(taxi.id,e.message) print("mw", sum(mw) / (len(mw) + 0.0)) # 9.46 print("standard deviation ", sqrt(sum(stdDev) / (len(stdDev) + 0.0))) return (barsDictSim, barsDict)
def readFCDCompleteOLD(fcdPath): """Reads the FCD-File and creates a list of Id's with a belonging List of Data tuples.""" # reset all global taxis, routes, vlsEdges, taxiIdDict, fcdDict taxis = [] routes = [] vlsEdges = [] taxiIdDict = {} fcdDict = {} vlsEdges = reader.readVLS_Edges() inputFile = open(fcdPath, 'r') for line in inputFile: words = line.split("\t") # add route taxiId = getTaxiId(words[4]) if taxiId in taxis: if words[1] in vlsEdges: # routes[taxis.index(taxiId)].append(words[1]) fcdDict[taxiId].append( (getTimeInSecs(words[0]), words[1], words[2])) else: taxiIdDict[words[4]] += 1 # if the edge is in the VLS-Area a new route is created elif words[1] in vlsEdges: taxis.append(taxiId) # departTime # routes.append([(int)(mktime(strptime(words[0],format))-simDate),words[1]]) fcdDict[taxiId] = [(getTimeInSecs(words[0]), words[1], words[2])] inputFile.close() return fcdDict
def readFCD(): """Reads the FCD and creates a list of Taxis and for each a list of routes""" vlsEdges = reader.readVLS_Edges() inputFile = open(path.fcd, 'r') for line in inputFile: words = line.split("\t") # add route taxiId = getTaxiId(words[4]) actTime = getTimeInSecs(words[0]) if taxiId in taxis: prevTime = routes[taxis.index(taxiId)][-1][0] # check if time lies not to far away from each other if words[1] in vlsEdges and (actTime - prevTime) < 180: routes[taxis.index(taxiId)].append((actTime, words[1])) # if time diff >3min add a new taxiId and start a new route elif words[1] in vlsEdges: taxiIdDict[words[4]] += 1 # create new taxiId taxis.append(getTaxiId(words[4])) # append new created id # append new list (list will be filled with edges) routes.append([(actTime, words[1])]) else: taxiIdDict[words[4]] += 1 # if the edge is in the VLS-Area a new route is created elif words[1] in vlsEdges: taxis.append(taxiId) # departTime routes.append([(actTime, words[1])]) inputFile.close() print len(taxis)
def getBars(): """Classifies the time difference in single bars.""" taxis = reader.readAnalysisInfo(WEE) barsDict = {} barsDictSim = {} stdDev = [] mw = [] for taxi in taxis: if len(taxi.getSteps()) < 1: continue try: # diff=getTimeDiff(taxi.getSteps(),False) diffSim, fcd, sim, no = getTimeDiff(taxi.getSteps()) # anna if diffSim > 150: print(diffSim, " ", taxi.id, " ", no, " ", fcd, " ", sim) # standard deviation stdDev.append((diffSim - 9.46) * (diffSim - 9.46)) mw.append(diffSim) # classify the absolute time difference # barsDict[(diff/10)*10]=barsDict.setdefault((diff/10)*10,0)+1 barsDictSim[ (diffSim / 10) * 10] = barsDictSim.setdefault((diffSim / 10) * 10, 0) + 1 except TypeError as e: tueNichts = True # print "Error by taxi %s : %s" %(taxi.id,e.message) print("mw", sum(mw) / (len(mw) + 0.0)) # 9.46 print("standard deviation ", sqrt(sum(stdDev) / (len(stdDev) + 0.0))) return (barsDictSim, barsDict)
def getBarsMulti(): """Classifies the time difference in single bars. But uses insted of getBars() several analysis-File and calculates a mean value""" fileIter=iglob(path.newPath(path.main,"auswertung/reisezeit/analysisFiles/taxiAnalysisInformation*.xml")) fcdDiffDict={} simDiffDict={} barsDict={} barsDictSim={} stdDev=[] mw=[] #calc diffs for file in fileIter: #for each path.analysisWEE=path.newPath(file) print path.analysisWEE taxis=reader.readAnalysisInfo(WEE) for taxi in taxis: if len(taxi.getSteps())<1: continue try: #diff=getTimeDiff(taxi.getSteps(),False) diffSim,fcd,sim,no=getTimeDiff(taxi.getSteps()) simDiffDict.setdefault(taxi.id,[]).append(sim) fcdDiffDict.setdefault(taxi.id,fcd) except TypeError, e: tueNichts=True
def getAveragedValues(interval): """catches all data in the given interval steps and calculates the average speed for each interval.""" timeValues = range(0, 86410, interval) fcdValues = [[] for i in range(0, 86410, interval)] simFcdValues = [[] for i in range(0, 86410, interval)] vtypeValues = [[] for i in range(0, 86410, interval)] relErrorValues = [[] for i in range(0, 86410, interval)] absErrorValues = [[] for i in range(0, 86410, interval)] fcdValuesNo = [set() for i in range(0, 86410, interval)] simFcdValuesNo = [set() for i in range(0, 86410, interval)] vtypeValuesNo = [set() for i in range(0, 86410, interval)] taxis = reader.readAnalysisInfo(WEE) # helper function def calcAverageOrLen(list, no=False): for i in range(len(list)): if len(list[i]) > 0: if no: # if no True clac Len list[i] = len(list[i]) else: list[i] = sum(list[i]) / len(list[i]) else: list[i] = None return list for taxi in taxis: for step in taxi.getSteps(): if step.source == SOURCE_FCD: # add the speed to the corresponding time interval fcdValues[step.time / interval].append(step.speed) fcdValuesNo[step.time / interval].add(taxi.id) elif step.source == SOURCE_SIMFCD: # add the speed to the corresponding time interval simFcdValues[step.time / interval].append(step.speed) simFcdValuesNo[step.time / interval].add(taxi.id) elif step.source == SOURCE_VTYPE: # add the speed to the corresponding time interval vtypeValues[step.time / interval].append(step.speed) vtypeValuesNo[step.time / interval].add(taxi.id) vtypeValues = calcAverageOrLen(vtypeValues) fcdValues = calcAverageOrLen(fcdValues) simFcdValues = calcAverageOrLen(simFcdValues) vtypeValuesNo = calcAverageOrLen(vtypeValuesNo, True) fcdValuesNo = calcAverageOrLen(fcdValuesNo, True) simFcdValuesNo = calcAverageOrLen(simFcdValuesNo, True) # calc relative Error for i in range(len(fcdValues)): if simFcdValues[i] is None or fcdValues[i] is None: relErrorValues[i] = None absErrorValues[i] = None else: # (angezeigter-richtiger Wert) absErr = simFcdValues[i] - fcdValues[i] relErrorValues[i] = absErr / float(fcdValues[i]) * 100 absErrorValues[i] = absErr return ([timeValues, fcdValues, simFcdValues, vtypeValues, fcdValuesNo, simFcdValuesNo, vtypeValuesNo, relErrorValues, absErrorValues], interval)
def main(): print "start program" global taxis, edgeDict #load data edgeDict=load(open(path.edgeLengthDict,'r')) taxis=reader.readAnalysisInfo(WEE) plotAllTaxis() #plotIt(taxiId) #reader.readEdgesLength() print "end"
def clacAvg(): durationList=[] taxis=reader.readAnalysisInfo() for taxi in taxis: try: dur=getTimeDiff(taxi.getSteps()) durationList.append(dur) if dur >=1479: print "maxtaxi", taxi except TypeError, e: print "Error by taxi %s : %s" %(taxi.id,e.message)
def clacAvg(): durationList = [] taxis = reader.readAnalysisInfo() for taxi in taxis: try: dur = getTimeDiff(taxi.getSteps()) durationList.append(dur) if dur >= 1479: print "maxtaxi", taxi except TypeError, e: print "Error by taxi %s : %s" % (taxi.id, e.message)
def sendmessage(usr): reader.write_file('./util/'+usr+'message.txt','','a') url='/account/'+usr+'/sendmessage' if not 'username' in session: return redirect('/') user_list=reader.getCsvDict('./util/credentials.txt').keys() messages=reader.read_file('./util/'+usr+'message.txt') messages=messages.split('\n') messages.pop(-1) if messages==['']: out=False else: out=True if request.method=='GET': return render_template('messages.html',dir=url,messages=messages,out=out) elif request.method=='POST': if not request.form['recipient'] in user_list: return render_template('messages.html',dir=url,messages=messages,out=out) mess.sendMessage(session['username'],request.form['recipient'],request.form['message']) return redirect(url)
def account(usr): if not 'username' in session: return redirect('/') user_list = reader.getCsvDict("./util/credentials.txt") if not usr in user_list.keys(): return render_template("error.html",error = "The username you have provided does not exist.",globe=globe) img=reader.getCsvDict('util/pfpimg.txt') userinfo=user_list[usr] gender=userinfo[1] Countryin=userinfo[2] Target=userinfo[3] url='/account/'+session['username']+'/settings' if session['username']==usr: own=True else: own=False if usr in img: img=img[usr][0] else: img='http://s3-static-ak.buzzfed.com/static/2014-07/14/12/campaign_images/webdr09/meet-lunita-the-cutest-baby-sloth-on-planet-earth-2-9684-1405357019-4_big.jpg' return render_template("account.html",user = usr,user_list = user_list,globe=globe, img=img,gender=gender,Country=Countryin,target=Target,own=own,dir=url)
def delete(): if request.method=='GET': reader.write_file('./util/'+session['username']+'message.txt','') else: if request.method=='POST': old=reader.getCsvList('./util/'+session['username']+'message.txt') old.pop([int(request.form.keys()[0])][0]) reader.write_file('./util/'+session['username']+'message.txt','') old.pop() for mess in old: reader.write_file('./util/'+session['username']+'message.txt',mess[0]+'\n','a') return redirect('/account/'+session['username']+'/sendmessage')
def generateVLS_FCD_File(): """Creates a new FCD-file which contains only the rows which edges belongs to the VLS-Area""" outputVLSFile = open(path.vls, 'w') inputFile = open(path.fcd, 'r') vlsEdgeList = reader.readVLS_Edges() for line in inputFile: words = line.split("\t") # check if edge belongs to the VLS-Area if words[1] in vlsEdgeList: outputVLSFile.write(line) inputFile.close() outputVLSFile.close()
def getBarsMulti(): """Classifies the time difference in single bars. But uses insted of getBars() several analysis-File and calculates a mean value""" fileIter = iglob( path.newPath( path.main, "auswertung/reisezeit/analysisFiles/taxiAnalysisInformation*.xml")) fcdDiffDict = {} simDiffDict = {} barsDict = {} barsDictSim = {} stdDev = [] mw = [] # calc diffs for file in fileIter: # for each path.analysisWEE = path.newPath(file) print(path.analysisWEE) taxis = reader.readAnalysisInfo(WEE) for taxi in taxis: if len(taxi.getSteps()) < 1: continue try: # diff=getTimeDiff(taxi.getSteps(),False) diffSim, fcd, sim, no = getTimeDiff(taxi.getSteps()) simDiffDict.setdefault(taxi.id, []).append(sim) fcdDiffDict.setdefault(taxi.id, fcd) except TypeError as e: tueNichts = True # print "Error by taxi %s : %s" %(taxi.id,e.message) for taxi, simList in simDiffDict.iteritems(): simDiffDict[taxi] = sum(simList) / (len(simList) + 0.0) # create barsDict for taxi in fcdDiffDict: fcd = fcdDiffDict[taxi] sim = simDiffDict[taxi] diff = sim - fcd relDiff = int(round(((100.0 * diff) / fcd))) barsDictSim[(relDiff / 10) * 10] = barsDictSim.setdefault( (relDiff / 10) * 10, 0) + 1 # standard deviation stdDev.append((relDiff - 9.53) * (relDiff - 9.53)) mw.append(relDiff) print("mw", sum(mw) / (len(mw) + 0.0)) # 9.91 #kor 0.48 print("standard deviation ", sqrt(sum(stdDev) / (len(stdDev) + 0.0))) return (barsDictSim, barsDict)
def getBarsMulti(): """Classifies the time difference in single bars. But uses insted of getBars() several analysis-File and calculates a mean value""" fileIter = iglob(path.newPath( path.main, "auswertung/reisezeit/analysisFiles/taxiAnalysisInformation*.xml")) fcdDiffDict = {} simDiffDict = {} barsDict = {} barsDictSim = {} stdDev = [] mw = [] # calc diffs for file in fileIter: # for each path.analysisWEE = path.newPath(file) print(path.analysisWEE) taxis = reader.readAnalysisInfo(WEE) for taxi in taxis: if len(taxi.getSteps()) < 1: continue try: # diff=getTimeDiff(taxi.getSteps(),False) diffSim, fcd, sim, no = getTimeDiff(taxi.getSteps()) simDiffDict.setdefault(taxi.id, []).append(sim) fcdDiffDict.setdefault(taxi.id, fcd) except TypeError as e: tueNichts = True # print "Error by taxi %s : %s" %(taxi.id,e.message) for taxi, simList in simDiffDict.iteritems(): simDiffDict[taxi] = sum(simList) / (len(simList) + 0.0) # create barsDict for taxi in fcdDiffDict: fcd = fcdDiffDict[taxi] sim = simDiffDict[taxi] diff = sim - fcd relDiff = int(round(((100.0 * diff) / fcd))) barsDictSim[ (relDiff / 10) * 10] = barsDictSim.setdefault((relDiff / 10) * 10, 0) + 1 # standard deviation stdDev.append((relDiff - 9.53) * (relDiff - 9.53)) mw.append(relDiff) print("mw", sum(mw) / (len(mw) + 0.0)) # 9.91 #kor 0.48 print("standard deviation ", sqrt(sum(stdDev) / (len(stdDev) + 0.0))) return (barsDictSim, barsDict)
def delete(): if request.method == 'GET': reader.write_file('./util/' + session['username'] + 'message.txt', '') else: if request.method == 'POST': old = reader.getCsvList('./util/' + session['username'] + 'message.txt') old.pop([int(request.form.keys()[0])][0]) reader.write_file('./util/' + session['username'] + 'message.txt', '') old.pop() for mess in old: reader.write_file( './util/' + session['username'] + 'message.txt', mess[0] + '\n', 'a') return redirect('/account/' + session['username'] + '/sendmessage')
def clacAvg(): durationList = [] taxis = reader.readAnalysisInfo() for taxi in taxis: try: dur = getTimeDiff(taxi.getSteps()) durationList.append(dur) if dur >= 1479: print("maxtaxi", taxi) except TypeError as e: print("Error by taxi %s : %s" % (taxi.id, e.message)) print("no", len(durationList)) print("avg", sum(durationList) / (len(durationList) + 0.0), "s =", end=" ") CalcTime.getSecsInTime(int(round(sum(durationList) / (len(durationList) + 0.0)))) print("min", min(durationList), "s =", end=" ") CalcTime.getSecsInTime(min(durationList)) print("max", max(durationList), "s =", end=" ") CalcTime.getSecsInTime(max(durationList))
def readFCDComplete(fcdPath): """Reads the FCD and creates a list of Taxis and for each a list of routes""" #reset all global taxis, routes, vlsEdges, taxiIdDict, fcdDict taxis = [] routes = [] vlsEdges = [] taxiIdDict = {} fcdDict = {} vlsEdges = reader.readVLS_Edges() inputFile = open(path.fcd, 'r') for line in inputFile: words = line.split("\t") #add route taxiId = getTaxiId(words[4]) actTime = getTimeInSecs(words[0]) if taxiId in taxis: #prevTime=routes[taxis.index(taxiId)][-1][0] prevTime = fcdDict[taxiId][-1][0] if words[1] in vlsEdges and ( actTime - prevTime ) < 180: #check if time lies not to far away from each other #routes[taxis.index(taxiId)].append((actTime, words[1])) fcdDict[taxiId].append((actTime, words[1], words[2])) elif words[ 1] in vlsEdges: #if time diff >3min add a new taxiId and start a new route taxiIdDict[words[4]] += 1 #create new taxiId taxis.append(getTaxiId(words[4])) #append new created id fcdDict[getTaxiId(words[4])] = [ (actTime, words[1], words[2]) ] #append new list (list will be filled with edges) else: taxiIdDict[words[4]] += 1 elif words[ 1] in vlsEdges: #if the edge is in the VLS-Area a new route is created taxis.append(taxiId) # departTime #routes.append([(actTime,words[1])]) fcdDict[taxiId] = [(actTime, words[1], words[2])] inputFile.close() return fcdDict
def getDataForTaxi(taxiId): """Gets the Data for a single Taxi""" values = [[], [], [], [], [], []] # x1,y1,x2,y2,x3,y3 starttime = 0 taxis = reader.readAnalysisInfo(WEE) for step in taxis[taxis.index(taxiId)].getSteps(): if step.source == SOURCE_FCD: values[0].append(step.time - starttime) values[1].append(step.speed) elif step.source == SOURCE_SIMFCD: values[2].append(step.time - starttime) values[3].append(step.speed) elif step.source == SOURCE_VTYPE: if starttime == 0: starttime = step.time values[4].append(step.time - starttime) values[5].append(step.speed) return (values, starttime)
def getDataForTaxi(taxiId): """Gets the Data for a single Taxi""" values=[[],[],[],[],[],[]] #x1,y1,x2,y2,x3,y3 starttime=0 taxis=reader.readAnalysisInfo(WEE) for step in taxis[taxis.index(taxiId)].getSteps(): if step.source==SOURCE_FCD: values[0].append(step.time-starttime) values[1].append(step.speed) elif step.source==SOURCE_SIMFCD: values[2].append(step.time-starttime) values[3].append(step.speed) elif step.source==SOURCE_VTYPE: if starttime==0: starttime=step.time values[4].append(step.time-starttime) values[5].append(step.speed) return (values,starttime)
def clacAvg(): durationList = [] taxis = reader.readAnalysisInfo() for taxi in taxis: try: dur = getTimeDiff(taxi.getSteps()) durationList.append(dur) if dur >= 1479: print("maxtaxi", taxi) except TypeError as e: print("Error by taxi %s : %s" % (taxi.id, e.message)) print("no", len(durationList)) print("avg", sum(durationList) / (len(durationList) + 0.0), "s =", end=' ') CalcTime.getSecsInTime( int(round(sum(durationList) / (len(durationList) + 0.0)))) print("min", min(durationList), "s =", end=' ') CalcTime.getSecsInTime(min(durationList)) print("max", max(durationList), "s =", end=' ') CalcTime.getSecsInTime(max(durationList))
def readFCDComplete(fcdPath): """Reads the FCD and creates a list of Taxis and for each a list of routes""" # reset all global taxis, routes, vlsEdges, taxiIdDict, fcdDict taxis = [] routes = [] vlsEdges = [] taxiIdDict = {} fcdDict = {} vlsEdges = reader.readVLS_Edges() inputFile = open(path.fcd, 'r') for line in inputFile: words = line.split("\t") # add route taxiId = getTaxiId(words[4]) actTime = getTimeInSecs(words[0]) if taxiId in taxis: # prevTime=routes[taxis.index(taxiId)][-1][0] prevTime = fcdDict[taxiId][-1][0] # check if time lies not to far away from each other if words[1] in vlsEdges and (actTime - prevTime) < 180: #routes[taxis.index(taxiId)].append((actTime, words[1])) fcdDict[taxiId].append((actTime, words[1], words[2])) # if time diff >3min add a new taxiId and start a new route elif words[1] in vlsEdges: taxiIdDict[words[4]] += 1 # create new taxiId taxis.append(getTaxiId(words[4])) # append new created id # append new list (list will be filled with edges) fcdDict[getTaxiId(words[4])] = [(actTime, words[1], words[2])] else: taxiIdDict[words[4]] += 1 # if the edge is in the VLS-Area a new route is created elif words[1] in vlsEdges: taxis.append(taxiId) # departTime # routes.append([(actTime,words[1])]) fcdDict[taxiId] = [(actTime, words[1], words[2])] inputFile.close() return fcdDict
def main(): print("start program") global taxis, edgeDict # decide if you want to save charts for every taxi or show a single one all = False taxiId = "316_3" # load data edgeDict = load(open(path.edgeLengthDict, "r")) taxis = reader.readAnalysisInfo(WEE) # reader.readEdgesLength() if all: plotAllTaxis() else: plotIt(taxiId) show() print("end")
def main(): print("start program") global taxis, edgeDict # decide if you want to save charts for every taxi or show a single one all = False taxiId = "316_3" # load data edgeDict = load(open(path.edgeLengthDict, 'r')) taxis = reader.readAnalysisInfo(WEE) # reader.readEdgesLength() if all: plotAllTaxis() else: plotIt(taxiId) show() print("end")
def getSpeeds(): """Reads the speeds from the analysis file""" taxis=reader.readAnalysisInfo(WEE) #read speeds for every edge for taxi in taxis: for step in taxi.getSteps(): if step.source==SOURCE_SIMFCD: vtypeEdgeDict.setdefault(step.edge,[]).append(float(step.speed)) elif step.source==SOURCE_FCD: fcdEdgeDict.setdefault(step.edge,[]).append(float(step.speed)) #calc avg speed for each edge #print fcdEdgeDict["558300689"] #print vtypeEdgeDict["558300689"] for edge in fcdEdgeDict: fcdEdgeDict[edge]=sum(fcdEdgeDict[edge])/len(fcdEdgeDict[edge]) print len(fcdEdgeDict) for edge in vtypeEdgeDict: vtypeEdgeDict[edge]=sum(vtypeEdgeDict[edge])/len(vtypeEdgeDict[edge]) print len(vtypeEdgeDict)
def readFCDOLD(): """Reads the FCD and creates a list of Taxis and for each a list of routes""" vlsEdges=reader.readVLS_Edges() inputFile=open(path.fcd,'r') for line in inputFile: words= line.split("\t") #add route taxiId=getTaxiId(words[4]) if taxiId in taxis: if words[1] in vlsEdges: routes[taxis.index(taxiId)].append(words[1]) else: taxiIdDict[words[4]]+=1 elif words[1] in vlsEdges: #if the edge is in the VLS-Area a new route is created taxis.append(taxiId) # departTime routes.append([getTimeInSecs(words[0]),words[1]]) inputFile.close() print len(taxis)
def home(): if not 'username' in session: return redirect('/') user_list=reader.getCsvDict('./util/credentials.txt') current=user_list[session['username']][3] del user_list[session['username']] g=0 rect=False rec=[] for i in user_list.keys(): if user_list[i]==current: rec.append(user_list.items()[g][0]) rect=True g+=1 if rec!=[]: rec=choice(rec) usr=session['username'] url='/account/'+usr+'/sendmessage' if not rect: return render_template('home.html',user=session['username'],prof='/account/'+session['username'],recomended=rect,dir=url) return render_template('home.html',user=session['username'],prof='/account/'+session['username'],rec='/account/'+rec,recomended=rect,dir=url)
class RemoteExecutor: """ Execute a command to the remote host through ssh session. This function also starts three threads that handle the input, error and output streams. Then the other functions can be used for conversating with the process. remexecutor.exec('ls -al') # prints remote home directory contents """ def __init__(self, remotehost): """ Initialize the connection.""" self.__connection = remotehost.connection self.__env = remotehost.env self.__session = self.__connection.openSession() self.__instr = None self.__errstr = None self.__inputreader = None self.__errortreader = None self.__outputwriter = None def exec(self,command): if not self.__connection.isAuthenticationComplete(): print "Connection not established" return if self.__session == None: self.__session = self.__connection.openSession() sess = self.__session if type(command) is type([]): # if command is a list make it a string command = " ".join(command) # make environment variables to string and assemble command environment = " ".join(["=".join(i) for i in self.__env]) command = "export " + environment + " && " + command sess.execCommand(command) # execute command self.__outputwriter = DataOutputStream(sess.getStdin()) # start a new thread for the input stream of the process and set the # Reader self.__instr = StreamGobbler(sess.getStdout()) self.__inputreader = Reader(BufferedReader(InputStreamReader(self.__instr))) # start a new thread for error stream of the process and set the # Reader self.__errstr = StreamGobbler(sess.getStderr()) self.__errorreader = Reader(BufferedReader(InputStreamReader(self.__errstr))) def input(self): """ Function for reading the output of a process. Wrapper for Reader readString function. """ if self.__inputreader is None: print "Error __inputstreamer__ is None" return return self.__inputreader.readString() def error(self): """ Function for reading the error of a process. Wrapper for Reader readString function. """ if self.__errorreader is None: print "Error __errorstreamer__ is None" return return self.__errorreader.readString() def write(self, bytes = None): """ Function to read from system in and write to the process input (or the proc output) """ writer = self.__outputwriter if bytes is None: bytes = raw_input() #for i in bytes[:]: # print ord(i) writer.writeBytes(bytes+"\n") writer.flush() def getEnv(self, var): env = self.__env for i in env: if var in i: return i[1] def setEnv(self, var, value): env = self.__env curvar = None for i in range(len(env)): if var in env[i]: curvar = env[i][1] del env[i] break self.__env.append((var,value)) def close(self): self.__instr.close() self.__errstr.close() self.__session.close() self.__instr = None self.__errstr = None self.__session = None
def get_nonce(noncefile): return Reader.read(noncefile, joiner='')
def __init__(self, options): self.reader = Reader(options.data_dir, options.data_augment) self.options = options
def get_msg(msgfile): return Reader.read(msgfile)
def preprocess_dataset(dataset_info): """ Preprocess the file information and insert into the database. The file type could be csv, txt and xls. The file information should hardcord in the config file. The function allow a increnmental way adding information to the database. @param dataset_info: The preprocess dataset information list. Each item in the list is a dictionary which contain the dataset name and all the insert task names. The insert task name should define in the config. @return: None """ for info in dataset_info: dataset_name, insert_tasks = info["dataset"], info["insert_tasks"] # get dataset preprocess config and basic information config = get_preprocess_config(dataset_name, insert_tasks) print("dataset: ", dataset_name) dataset = db[dataset_name] # delete all the data in the current dataset, may uncomment when developing # delete_all_date(dataset) # get all the patient id in the current dataset all_patient_id = { patient_id["patient_id"] for patient_id in query_field(dataset, field={ "_id": 0, "patient_id": 1 }) } # get the raw data for increnmental adding raw_data = { result["patient_id"]: { field: result[field] for field in result if field != "patient_id" } for result in query_field(dataset) } data = defaultdict(lambda: dict()) # for each sub dataset task for insert_task in insert_tasks: # get sub dataset basic information filenames = config[insert_task]["filename"] fields = config[insert_task]["select_column"] # ASSUMPTION: all the insert task has field patient_id and the meaning is the same. # D1NAMO break the assumption and will adhoc get the patient id from file name. patient_idx = sum( [i for i in range(len(fields)) if fields[i] == "patient_id"]) for filename in filenames: # get the file real path file = os.path.join( os.path.join(config["root_dir"], config["dataset"]), filename) print("processing file", file) # ASSUMPTION: all the file type in the insert task is the same. # get the file reader and line count if config[insert_task]["file_type"] == "xls": cnt = line_count_xls(file) readable = Reader( xlrd.open_workbook(file).sheets()[0], config[insert_task]["file_type"]) # file type is txt or csv else: cnt, readable = line_count(file), Reader( open(file), config[insert_task]["file_type"]) # use tqdm to show the process progress with tqdm(total=cnt) as bar: for line_cnt in range(cnt): # get file content line = readable.readline() # if the line is not the header if line_cnt != 0: # get patient_id if dataset_name == "D1NAMO": patient_id = int(file.split("/")[-2]) else: patient_id = str(int(float(line[patient_idx]))) # if the patient id is not in the dataset, add this patient to the database. if patient_id not in all_patient_id: insert_one_data(dataset, {"patient_id": patient_id}) all_patient_id.add(patient_id) # get line timestamp. if there is no timestamp, it will be 0 timestamp = 0 if "datetime" in fields: timestamp += sum( datetime_to_int( line[i], config[insert_task] ["basedate"], config[insert_task] ["pattern"]) for i in range(len(fields)) if fields[i] == "datetime") else: if "date" in fields: timestamp += sum( date_to_int( line[i], config[insert_task] ["basedate"], config[insert_task] ["pattern"]) for i in range(len(fields)) if fields[i] == "date") if "timestamp" in fields: timestamp += sum( timestamp_to_int( line[i], config[insert_task] ["pattern"]) for i in range(len(fields)) if fields[i] == "timestamp") row_combine_field = dict() for idx in range(len(line)): if idx >= len(line): continue content, field = line[idx], config[ insert_task]["select_column"][idx] # if the field should not append or there is no content in the line, continue if field == '' or len(content) == 0: continue # if the field is patient_id or timestamp related, continue if field in { "patient_id", "datetime", "date", "timestamp" }: continue # if the field is a status, the field content will not store in list style. if field in status_field_set: # adhoc for field trouble_sleep_inverse if field == "trouble_sleep_inverse": data[patient_id][ "trouble_sleep"] = str( 5 - int(content)) # adhoc for field low_gl elif field == "low_gl": data[patient_id][ "low_gl"] = content.split(" ")[0] else: data[patient_id][field] = content # adhoc for field weight_units (weight should in data before) elif field == "weight_units": if content == "lbs": data[patient_id]["weight"] = str( LBS_TO_KG * float(data[patient_id]["weight"])) # if the field is need store with timestamp elif field in timestamp_field_set: # adhoc for field raw_gl if field == "raw_gl": content = str(float(content) * 18) field = "gl" # if field not in patient's data, initial from raw data in database if field not in data[patient_id]: data[patient_id][field] = \ list() if patient_id not in raw_data or field not in raw_data[patient_id] \ else raw_data[patient_id][field] # append the content with timestamp data[patient_id][field].append( [content, timestamp]) # if the field needs to combine to another field elif field in combine_field_set: combine_field = combine_field_set[field] if combine_field not in row_combine_field: row_combine_field[combine_field] = 0 row_combine_field[combine_field] += float( content) # for the common field, store in list style else: # if field not in patient's data, initial from raw data in database if field not in data[patient_id]: data[patient_id][field] = \ list() if patient_id not in raw_data or field not in raw_data[patient_id] \ else raw_data[patient_id][field] data[patient_id][field].append(content) # ASSUMPTION: the combine field is the common field (not status or store with timestamp) for field in row_combine_field: if field not in data[patient_id]: data[patient_id][field] = list() data[patient_id][field].append( str(row_combine_field[field])) # update the progress bar bar.update() # insert the preprocessed data to the database print("start to insert data to:", dataset_name) start = time.clock() for patient_id in data: for field in data[patient_id]: # update the field in the database update_data(dataset, {"patient_id": patient_id}, {'$set': { field: data[patient_id][field] }}) print("use time to insert:", time.clock() - start)
parser.add_argument('--source', type=str, default='fgr', help='fgr') args = parser.parse_args() if args.dataset is None: raise ValueError('Must specify dataset, e.g. redwood or scannet, etc.') if args.source is None: raise ValueError('Must specify input source, e.g. fgr or Super4PCS, etc.') home = env() dataset = args.dataset source = args.source pathlib.Path('%s/relative_pose/summary/%s/%s' % (home, dataset, source)).mkdir( exist_ok=True, parents=True) reader = Reader() PATH_SUMMARY = '%s/relative_pose/summary/{}/{}/{}.mat' % home for sceneid in reader.list_scenes(dataset): scanids = reader.get_scanids(dataset, sceneid) output_mat = PATH_SUMMARY.format(dataset, source, sceneid) if os.path.exists(output_mat): continue n = len(scanids) scanid_map = {str(scanid): i for i, scanid in enumerate(scanids)} T = np.zeros((n*4, n*4)) sigma = np.zeros((n, n)) aerr = np.zeros((n, n)) + 10000000.0 terr = np.zeros((n, n)) + 10000000.0 RLlist = reader.list_relative_poses(dataset, source, sceneid)
def pfppic(): if not 'username' in session: return redirect('/') url=request.form['image'] reader.write_file('util/pfpimg.txt',session['username']+','+url+'\n','a') return redirect('/account/'+session['username'])
def get_number(numfile): return Reader.read(numfile, joiner='')
def get_key(keyfile): return Reader.read(keyfile, joiner='')
class Session(object): def __init__(self, options): self.reader = Reader(options.data_dir, options.data_augment) self.options = options def supervised_enc(self): encoder = self.create_encoder() if os.path.exists(self.options.result_dir + 'model_enc'): self.load_encoder(encoder) enc_trainer = optimizers[self.options.optimizer](encoder.model) lr = self.options.lr #used only for sgd i = 0 best_f1 = 0 print('supervised training for encoder...') for epoch in range(self.options.epochs): sents = 0 total_loss = 0.0 train = self.reader.next_example(0) train_size = len(self.reader.data[0]) for data in train: s1, s2, s3, pos, act = data[0], data[1], data[2], data[ 3], data[4] loss = encoder.train(s1, s2, s3, pos, act, self.options.enc_dropout) sents += 1 if loss is not None: total_loss += loss.scalar_value() loss.backward() if self.options.optimizer == 'sgd': enc_trainer.update(lr) else: enc_trainer.update() e = float(i) / train_size if i % self.options.print_every == 0: print('epoch {}: loss per sentence: {}'.format( e, total_loss / sents)) sents = 0 total_loss = 0.0 if i != 0 and i % self.options.save_every == 0: print('computing loss on validation set...') valid = self.reader.next_example(2) #fix this valid_size = len(self.reader.data[2]) rf = open(self.options.result_dir + 'result', 'w') for vdata in valid: s1, s2, s3, pos, act = vdata[0], vdata[1], vdata[ 2], vdata[3], vdata[4] _, output, _ = encoder.parse(s1, s2, s3, pos) rf.write(output + '\n') rf.close() f1 = compute_eval_score(self.options.result_dir) if f1 > best_f1: best_f1 = f1 print('highest f1: {}'.format(f1)) print('saving model...') encoder.Save(self.options.result_dir + 'model_enc') else: lr = lr * self.options.decay i += 1 def supervised_dec(self): decoder = self.create_decoder() if os.path.exists(self.options.result_dir + 'model_dec'): self.load_decoder(decoder) dec_trainer = optimizers[self.options.optimizer](decoder.model) lr = self.options.lr #used only for sgd i = 0 lowest_valid_loss = 9999 print('supervised training for decoder...') for epoch in range(self.options.epochs): sents = 0 total_loss = 0.0 train = self.reader.next_example(0) train_size = len(self.reader.data[0]) for data in train: s1, s2, s3, pos, act = data[0], data[1], data[2], data[ 3], data[4] loss, loss_act, loss_word = decoder.compute_loss( s3, act, self.options.dec_dropout) sents += 1 if loss is not None: total_loss += loss.scalar_value() loss.backward() if self.options.optimizer == 'sgd': dec_trainer.update(lr) else: dec_trainer.update() e = float(i) / train_size if i % self.options.print_every == 0: print('epoch {}: loss per sentence: {}'.format( e, total_loss / sents)) sents = 0 total_loss = 0.0 if i != 0 and i % self.options.save_every == 0: print('computing loss on validation set...') total_valid_loss = 0 valid = self.reader.next_example(1) valid_size = len(self.reader.data[1]) for vdata in valid: s1, s2, s3, pos, act = vdata[0], vdata[1], vdata[ 2], vdata[3], vdata[4] valid_loss, _, _ = decoder.compute_loss(s3, act) if valid_loss is not None: total_valid_loss += valid_loss.scalar_value() total_valid_loss = total_valid_loss * 1.0 / valid_size if total_valid_loss < lowest_valid_loss: lowest_valid_loss = total_valid_loss print('saving model...') decoder.Save(self.options.result_dir + 'model_dec') else: lr = lr * self.options.decay i += 1 def unsupervised_with_baseline(self): decoder = self.create_decoder() assert (os.path.exists(self.options.result_dir + 'model_dec')) self.load_decoder(decoder) encoder = self.create_encoder() assert (os.path.exists(self.options.result_dir + 'model_enc')) self.load_encoder(encoder) baseline = self.create_baseline() if os.path.exists(self.options.result_dir + 'baseline'): self.load_baseline(baseline) enc_trainer = optimizers[self.options.optimizer](encoder.model) dec_trainer = optimizers[self.options.optimizer](decoder.model) baseline_trainer = optimizers[self.options.optimizer](baseline.model) lr = self.options.lr #used only for sgd i = 0 lowest_valid_loss = 9999 print('unsupervised training...') for epoch in range(self.options.epochs): sents = 0 total_loss = 0.0 train = self.reader.next_example(0) train_size = len(self.reader.data[0]) for data in train: s1, s2, s3, pos, act = data[0], data[1], data[2], data[ 3], data[4] sents += 1 # random sample enc_loss_act, _, act = encoder.parse(s1, s2, s3, pos, sample=True) _, dec_loss_act, dec_loss_word = decoder.compute_loss(s3, act) # save reward logpx = -dec_loss_word.scalar_value() total_loss -= logpx # reconstruction and regularization loss backprop to theta_d dec_loss_total = dec_loss_word + dec_loss_act * dy.scalarInput( self.options.dec_reg) dec_loss_total = dec_loss_total * dy.scalarInput( 1.0 / self.options.mcsamples) dec_loss_total.scalar_value() dec_loss_total.backward() # update decoder if self.options.optimizer == 'sgd': dec_trainer.update(lr) else: dec_trainer.update() if self.options.enc_update > 0: # compute baseline and backprop to theta_b b = baseline(s3) logpxb = b.scalar_value() b_loss = dy.squared_distance(b, dy.scalarInput(logpx)) b_loss.value() b_loss.backward() # update baseline if self.options.optimizer == 'sgd': baseline_trainer.update(lr) else: baseline_trainer.update() # policy and and regularization loss backprop to theta_e enc_loss_act = encoder.train(s1, s2, s3, pos, act) enc_loss_policy = enc_loss_act * dy.scalarInput( (logpx - logpxb) / len(s1)) enc_loss_total = enc_loss_policy * dy.scalarInput( self.options.enc_update ) - enc_loss_act * dy.scalarInput(self.options.enc_reg) enc_loss_total = enc_loss_total * dy.scalarInput( 1.0 / self.options.mcsamples) enc_loss_total.value() enc_loss_total.backward() # update encoder if self.options.optimizer == 'sgd': enc_trainer.update(lr) else: enc_trainer.update() e = float(i) / train_size if i % self.options.print_every == 0: print('epoch {}: loss per sentence: {}'.format( e, total_loss / sents)) sents = 0 total_loss = 0.0 if i != 0 and i % self.options.save_every == 0: print('computing loss on validation set...') total_valid_loss = 0 valid = self.reader.next_example(1) valid_size = len(self.reader.data[1]) for vdata in valid: s1, s2, s3, pos, act = vdata[0], vdata[1], vdata[ 2], vdata[3], vdata[4] _, _, valid_word_loss = decoder.compute_loss(s3, act) if valid_word_loss is not None: total_valid_loss += valid_word_loss.scalar_value() total_valid_loss = total_valid_loss * 1.0 / valid_size if total_valid_loss < lowest_valid_loss: lowest_valid_loss = total_valid_loss print('saving model...') encoder.Save(self.options.result_dir + 'model_enc') decoder.Save(self.options.result_dir + 'model_dec') baseline.Save(self.options.result_dir + 'baseline') else: lr = lr * self.options.decay i += 1 def unsupervised_without_baseline(self): decoder = self.create_decoder() assert (os.path.exists(self.options.result_dir + 'model_dec')) self.load_decoder(decoder) encoder = self.create_encoder() assert (os.path.exists(self.options.result_dir + 'model_enc')) self.load_encoder(encoder) enc_trainer = optimizers[self.options.optimizer](encoder.model) dec_trainer = optimizers[self.options.optimizer](decoder.model) lr = self.options.lr #used only for sgd i = 0 lowest_valid_loss = 9999 print('unsupervised training...') for epoch in range(self.options.epochs): sents = 0 total_loss = 0.0 train = self.reader.next_example(0) train_size = len(self.reader.data[0]) for data in train: s1, s2, s3, pos, act = data[0], data[1], data[2], data[ 3], data[4] sents += 1 # max sample enc_loss_act, _, act = encoder.parse(s1, s2, s3, pos, sample=False) _, dec_loss_act, dec_loss_word = decoder.compute_loss(s3, act) logpxb = -dec_loss_word.scalar_value() total_loss -= logpxb # random sample enc_loss_act, _, act = encoder.parse(s1, s2, s3, pos, sample=True) _, dec_loss_act, dec_loss_word = decoder.compute_loss(s3, act) # save reward logpx = -dec_loss_word.scalar_value() # reconstruction and regularization loss backprop to theta_d dec_loss_total = dec_loss_word + dec_loss_act * dy.scalarInput( self.options.dec_reg) dec_loss_total = dec_loss_total * dy.scalarInput( 1.0 / self.options.mcsamples) dec_loss_total.scalar_value() dec_loss_total.backward() # update decoder if self.options.optimizer == 'sgd': dec_trainer.update(lr) else: dec_trainer.update() if self.options.enc_update > 0: # policy and and regularization loss backprop to theta_e enc_loss_act = encoder.train(s1, s2, s3, pos, act) enc_loss_policy = enc_loss_act * dy.scalarInput( (logpx - logpxb) / len(s1)) enc_loss_total = enc_loss_policy * dy.scalarInput( self.options.enc_update ) - enc_loss_act * dy.scalarInput(self.options.enc_reg) enc_loss_total = enc_loss_total * dy.scalarInput( 1.0 / self.options.mcsamples) enc_loss_total.value() enc_loss_total.backward() if self.options.optimizer == 'sgd': enc_trainer.update(lr) else: enc_trainer.update() e = float(i) / train_size if i % self.options.print_every == 0: print('epoch {}: loss per sentence: {}'.format( e, total_loss / sents)) sents = 0 total_loss = 0.0 if i != 0 and i % self.options.save_every == 0: print('computing loss on validation set...') total_valid_loss = 0 valid = self.reader.next_example(1) valid_size = len(self.reader.data[1]) for vdata in valid: s1, s2, s3, pos, act = vdata[0], vdata[1], vdata[ 2], vdata[3], vdata[4] _, _, valid_word_loss = decoder.compute_loss(s3, act) if valid_word_loss is not None: total_valid_loss += valid_word_loss.scalar_value() total_valid_loss = total_valid_loss * 1.0 / valid_size if total_valid_loss < lowest_valid_loss: lowest_valid_loss = total_valid_loss print('saving model...') encoder.Save(self.options.result_dir + 'model_enc') decoder.Save(self.options.result_dir + 'model_dec') else: lr = lr * self.options.decay i += 1 def pretrain_baseline(self): baseline = self.create_baseline() if os.path.exists(self.options.result_dir + 'baseline'): self.load_baseline(baseline) baseline_trainer = optimizers[self.options.optimizer](baseline.model) lr = self.options.lr #used only for sgd i = 0 lowest_valid_loss = 9999 print('train baseline, for simplicity use the same data here') for epoch in range(self.options.epochs): sents = 0 total_loss = 0.0 train = self.reader.next_example(0) train_size = len(self.reader.data[0]) for data in train: s1, s2, s3, pos, act = data[0], data[1], data[2], data[ 3], data[4] sents += 1 loss = -baseline(s3) if loss is not None: total_loss += loss.scalar_value() loss.backward() if self.options.optimizer == 'sgd': baseline_trainer.update(lr) else: baseline_trainer.update() e = float(i) / train_size if i % self.options.print_every == 0: print('epoch {}: loss per sentence: {}'.format( e, total_loss / sents)) sents = 0 total_loss = 0.0 if i != 0 and i % self.options.save_every == 0: print('computing loss on validation set...') total_valid_loss = 0 valid = self.reader.next_example(1) valid_size = len(self.reader.data[1]) for vdata in valid: s1, s2, s3, pos, act = vdata[0], vdata[1], vdata[ 2], vdata[3], vdata[4] valid_loss = -baseline(s3) if valid_loss is not None: total_valid_loss += valid_loss.scalar_value() total_valid_loss = total_valid_loss * 1.0 / valid_size if total_valid_loss < lowest_valid_loss: lowest_valid_loss = total_valid_loss print('saving model...') baseline.Save(self.options.result_dir + 'baseline') else: lr = lr * self.options.decay i += 1 def parsing(self): decoder = self.create_decoder() assert (os.path.exists(self.options.result_dir + 'model_dec')) self.load_decoder(decoder) encoder = self.create_encoder() assert (os.path.exists(self.options.result_dir + 'model_enc')) self.load_encoder(encoder) print('parsing...') rf = open(os.path.join(self.options.result_dir, 'result'), 'w') test = self.reader.next_example(2) p = Parser(encoder, decoder) for dataid, data in enumerate(test): s1, s2, s3, pos, act = data[0], data[1], data[2], data[3], data[4] output = p(s1, s2, s3, pos, self.options.nsamples) rf.write(output + '\n') rf.close() f1 = compute_eval_score(self.options.result_dir) print('bracket F1 score is {}'.format(f1)) def language_modeling(self): decoder = self.create_decoder() assert (os.path.exists(self.options.result_dir + 'model_dec')) self.load_decoder(decoder) encoder = self.create_encoder() assert (os.path.exists(self.options.result_dir + 'model_enc')) self.load_encoder(encoder) print('computing language model score...') test = self.reader.next_example(2) lm = LanguageModel(encoder, decoder) total_ll = 0 total_tokens = 0 for dataid, data in enumerate(test): s1, s2, s3, pos, act = data[0], data[1], data[2], data[3], data[4] if len(s1) <= 1: continue total_ll += lm(s1, s2, s3, pos, self.options.nsamples) total_tokens += len(s1) perp = compute_perplexity(total_ll, total_tokens) print('perplexity: {}'.format(perp)) def create_decoder(self): return Decoder(self.reader, self.options.nlayers, self.options.word_dim, self.options.pretrained_dim, self.options.action_dim, self.options.dec_lstm_dim, self.options.embedding_file) def create_encoder(self): return Encoder(self.reader, self.options.nlayers, self.options.word_dim, self.options.pretrained_dim, self.options.pos_dim, self.options.action_dim, self.options.enc_lstm_dim, self.options.embedding_file) def create_baseline(self): baseline = None if self.options.baseline == 'rnnlm': baseline = LanguageModelBaseline(self.reader, self.options.word_dim, self.options.pretrained_dim, self.options.dec_lstm_dim, self.options.embedding_file) elif self.options.baseline == 'rnnauto': baseline = RNNAutoencBaseline(self.reader, self.options.word_dim, self.options.pretrained_dim, self.options.dec_lstm_dim, self.options.embedding_file) elif self.options.baseline == 'mlp': baseline = MLPAutoencBaseline(self.reader, self.options.word_dim, self.options.pretrained_dim, self.options.embedding_file) else: raise NotImplementedError("Baseline Not Implmented") return baseline def load_decoder(self, decoder): decoder.Load(self.options.result_dir + 'model_dec') def load_encoder(self, encoder): encoder.Load(self.options.result_dir + 'model_enc') def load_baseline(self, baseline): baseline.Load(self.options.result_dir + 'baseline')