示例#1
0
def processLine(line, tlds):
  rawFields = line.split("|")
  url = rawFields[0]
  urlFields = parseUrl(url, tlds)

  timestamp = rawFields[5]


  tld=urlFields[0]
  hostname=urlFields[1]
  port=urlFields[2] 
  subdomain=urlFields[3]
  path1=urlFields[4]
  path2=urlFields[5]
  path3=urlFields[6]
  path4=urlFields[7]
  path5=urlFields[8]
  path6=urlFields[9]
  ret = [timestamp, tld, hostname, port, subdomain, path1, path2, path3, path4, path5, path6]
  # ret.append(url)
  return ret
示例#2
0
def processOneUrl(url): 
      timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%s")
      urlFields = parseUrl(url, tlds)

      # Create dict: timestamp,tld,hostname,port,subdomain,path1,path2,path3,path4,path5,path6
      modelInput = dict(timestamp=timestamp, url=url, 
        tld=urlFields[0], 
        hostname=urlFields[1], 
        port=urlFields[2], 
        subdomain=urlFields[3], 
        path1=urlFields[4], 
        path2=urlFields[5], 
        path3=urlFields[6], 
        path4=urlFields[7],
        path5=urlFields[8],
        path6=urlFields[9])
      result = model.run(modelInput)
      table = texttable.Texttable()
      oneStepPredictions = result.inferences['multiStepPredictions'][1]
      threeStepPredictions = result.inferences['multiStepPredictions'][3]
      printTopNPredictions(oneStepPredictions, "next", table, 3)
      printTopNPredictions(threeStepPredictions, "third", table, 3)
      print table.draw()