示例#1
0
def main():
    path = sys.argv[1]

    with open(path, 'r') as f:
        for line in f.readlines():
            for graf_text in json.loads(line):
                for sent in exsto.parse_graf(graf_text):
                    print exsto.pretty_print(sent)
示例#2
0
文件: filter.py 项目: aimran/exsto
def main ():
  path = sys.argv[1]

  if os.path.isdir(path):
    exsto.test_filter(path)
  else:
    with open(path, 'r') as f:
      for line in f.readlines():
        meta = json.loads(line)
        print exsto.pretty_print(exsto.filter_quotes(meta["text"]))
示例#3
0
def main():
    path = sys.argv[1]

    if os.path.isdir(path):
        exsto.test_filter(path)
    else:
        with open(path, 'r') as f:
            for line in f.readlines():
                meta = json.loads(line)
                print exsto.pretty_print(exsto.filter_quotes(meta["text"]))
示例#4
0
def main():
    global DEBUG
    path = sys.argv[1]

    with open(path, 'r') as f:
        for line in f.readlines():
            meta = json.loads(line)

            for graf_text in exsto.filter_quotes(meta["text"]):
                try:
                    for sent in exsto.parse_graf(meta["id"], graf_text):
                        print exsto.pretty_print(sent)
                except (IndexError) as e:
                    if DEBUG:
                        print "IndexError: " + str(e)
                        print graf_text
示例#5
0
文件: etl.py 项目: aimran/exsto
def main ():
  global DEBUG
  path = sys.argv[1]

  with open(path, 'r') as f:
    for line in f.readlines():
      meta = json.loads(line)

      for graf_text in exsto.filter_quotes(meta["text"]):
        try:
          for sent in exsto.parse_graf(meta["id"], graf_text):
            print exsto.pretty_print(sent)
        except (IndexError) as e:
          if DEBUG:
            print "IndexError: " + str(e)
            print graf_text
示例#6
0
def main():
  global DEBUG

  path = sys.argv[1]

  with open(path, 'r') as f:
    for line in f.readlines():
      meta = json.loads(line)
      base = 0

      for graf_text in exsto.filter_quotes(meta["text"]):
        if DEBUG:
          print graf_text

        grafs, new_base = exsto.parse_graf(meta["id"], graf_text, base)
        base = new_base

        for graf in grafs:
          print exsto.pretty_print(graf)
示例#7
0
def main ():
  config = ConfigParser.ConfigParser()
  config.read("defaults.cfg")

  iterations = config.getint("scraper", "iterations")
  nap_time = config.getint("scraper", "nap_time")
  base_url = config.get("scraper", "base_url")
  url = base_url + config.get("scraper", "start_url")

  with open(sys.argv[1], 'w') as f:
    for i in xrange(0, iterations):
      if len(url) < 1:
        break
      else:
        root = exsto.scrape_url(url)
        meta = exsto.parse_email(root, base_url)

        f.write(exsto.pretty_print(meta))
        f.write('\n')

        url = meta["next_url"]
        time.sleep(nap_time)