Пример #1
0
def main():
  tlds = loadTlds()

  inFile = open(rawFile)
  outFile = open(sanitizedFile, "w")
  writer = csv.writer(outFile)
  appendHeader(writer)
  for line in inFile:
    fields = processLine(line, tlds)
    writer.writerow(fields)
  inFile.close()
  outFile.close()
Пример #2
0
from nupic.frameworks.opf.modelfactory import ModelFactory
from nupic.frameworks.opf.predictionmetricsmanager import MetricsManager
from urls import loadTlds
from urls import parseUrl
import datetime
import texttable


scriptDir = os.path.dirname(os.path.realpath(__file__))
_LOGGER = logging.getLogger(__name__)
_DATA_PATH = os.path.join(scriptDir, "..", "data", "sanitized.csv")
_MODEL_PATH = os.path.join(scriptDir, "..", "savedModel", "checkpoint")
model = ModelFactory.loadFromCheckpoint(_MODEL_PATH)
model.disableLearning()

tlds = loadTlds()


class UrlShell(cmd.Cmd):
    intro = 'Enter URL to predict next hostname.\n'
    prompt = 'url> '
    file = None

    def default(self, line):
      processOneUrl(line)

def processOneUrl(url): 
      timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%s")
      urlFields = parseUrl(url, tlds)

      # Create dict: timestamp,tld,hostname,port,subdomain,path1,path2,path3,path4,path5,path6