def processLine(line, tlds): rawFields = line.split("|") url = rawFields[0] urlFields = parseUrl(url, tlds) timestamp = rawFields[5] tld=urlFields[0] hostname=urlFields[1] port=urlFields[2] subdomain=urlFields[3] path1=urlFields[4] path2=urlFields[5] path3=urlFields[6] path4=urlFields[7] path5=urlFields[8] path6=urlFields[9] ret = [timestamp, tld, hostname, port, subdomain, path1, path2, path3, path4, path5, path6] # ret.append(url) return ret
def processOneUrl(url): timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%s") urlFields = parseUrl(url, tlds) # Create dict: timestamp,tld,hostname,port,subdomain,path1,path2,path3,path4,path5,path6 modelInput = dict(timestamp=timestamp, url=url, tld=urlFields[0], hostname=urlFields[1], port=urlFields[2], subdomain=urlFields[3], path1=urlFields[4], path2=urlFields[5], path3=urlFields[6], path4=urlFields[7], path5=urlFields[8], path6=urlFields[9]) result = model.run(modelInput) table = texttable.Texttable() oneStepPredictions = result.inferences['multiStepPredictions'][1] threeStepPredictions = result.inferences['multiStepPredictions'][3] printTopNPredictions(oneStepPredictions, "next", table, 3) printTopNPredictions(threeStepPredictions, "third", table, 3) print table.draw()